red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -3,22 +3,70 @@
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
module RedAmber
|
6
|
-
#
|
6
|
+
# Mix-in for the class DataFrame
|
7
7
|
module DataFrameDisplayable
|
8
|
+
# Refineme class String
|
9
|
+
using RefineString
|
10
|
+
|
11
|
+
# Used internally to display table.
|
8
12
|
INDEX_KEY = :index_key_for_format_table
|
13
|
+
private_constant :INDEX_KEY
|
14
|
+
|
15
|
+
# rubocop:disable Layout/LineLength
|
9
16
|
|
10
|
-
|
17
|
+
# Show a preview of self as a string.
|
18
|
+
#
|
19
|
+
# @param width [Integer]
|
20
|
+
# maximum size of result.
|
21
|
+
# @param head [Integer]
|
22
|
+
# number of records to show from head.
|
23
|
+
# @param tail [Integer]
|
24
|
+
# number of records to show at tail.
|
25
|
+
# @return [String]
|
26
|
+
# string representation of self.
|
27
|
+
# @example Show penguins dataset
|
28
|
+
# puts penguins.to_s
|
29
|
+
#
|
30
|
+
# # =>
|
31
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
|
32
|
+
# <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
|
33
|
+
# 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
|
34
|
+
# 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
|
35
|
+
# 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
|
36
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
|
37
|
+
# 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
|
38
|
+
# : : : : : : : ... :
|
39
|
+
# 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
|
40
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
|
41
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
|
42
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
|
43
|
+
#
|
44
|
+
def to_s(width: 90, head: 5, tail: 4)
|
11
45
|
return '' if empty?
|
12
46
|
|
13
|
-
format_table(width: width)
|
47
|
+
format_table(width: width, head: head, tail: tail)
|
14
48
|
end
|
15
49
|
|
16
|
-
# Show statistical summary by a new
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
50
|
+
# Show statistical summary by a new DataFrame.
|
51
|
+
#
|
52
|
+
# This method will make stats only for numeric columns.
|
53
|
+
# - NaNs are ignored.
|
54
|
+
# - `count` shows non-NaN counts.
|
55
|
+
#
|
56
|
+
# @return [DataFrame]
|
57
|
+
# a new dataframe.
|
58
|
+
# @example Statistical summary of penguins dataset
|
59
|
+
# puts penguins.summary.to_s
|
60
|
+
#
|
61
|
+
# # =>
|
62
|
+
# variables count mean std min 25% median 75% max
|
63
|
+
# <dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
|
64
|
+
# 0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
|
65
|
+
# 1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
|
66
|
+
# 2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
|
67
|
+
# 3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
|
68
|
+
# 4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
|
20
69
|
#
|
21
|
-
# @return [DataFrame] a new dataframe.
|
22
70
|
def summary
|
23
71
|
num_keys = keys.select { |key| self[key].numeric? }
|
24
72
|
|
@@ -36,29 +84,223 @@ module RedAmber
|
|
36
84
|
end
|
37
85
|
alias_method :describe, :summary
|
38
86
|
|
87
|
+
# Show information of self.
|
88
|
+
#
|
89
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
90
|
+
# - If it is 'TDR', returns class name, shape, object id
|
91
|
+
# and transposed preview for up to 10 variables.
|
92
|
+
# - If it is 'TDRA', returns class name, shape, object id
|
93
|
+
# and transposed preview for all variables.
|
94
|
+
# - If it is 'MINIMUM', returns class name and shape.
|
95
|
+
# - If it is 'PLAIN', returns class name, shape and Table preview
|
96
|
+
# for up to 512 columns and 128 columns.
|
97
|
+
# - If it is 'TABLE' or otherwise, returns class name, shape, object id
|
98
|
+
# and Table preview for up to 512 rows and 512 columns.
|
99
|
+
# Default value of the ENV is 'Table'.
|
100
|
+
# @return [String]
|
101
|
+
# information of self.
|
102
|
+
# @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
|
103
|
+
# puts df.inspect
|
104
|
+
#
|
105
|
+
# # =>
|
106
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
|
107
|
+
# x y
|
108
|
+
# <uint8> <string>
|
109
|
+
# 0 1 A
|
110
|
+
# 1 2 B
|
111
|
+
# 2 3 C
|
112
|
+
#
|
113
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'TDR'
|
114
|
+
# puts df.inspect
|
115
|
+
#
|
116
|
+
# # =>
|
117
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
|
118
|
+
# Vectors : 1 numeric, 1 string
|
119
|
+
# # key type level data_preview
|
120
|
+
# 0 :x uint8 3 [1, 2, 3]
|
121
|
+
# 1 :y string 3 ["A", "B", "C"]
|
122
|
+
#
|
123
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
|
124
|
+
# puts df.inspect
|
125
|
+
#
|
126
|
+
# # =>
|
127
|
+
# RedAmber::DataFrame : 3 x 2 Vectors
|
128
|
+
#
|
39
129
|
def inspect
|
40
130
|
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
131
|
case mode.upcase
|
42
132
|
when 'TDR'
|
43
|
-
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(
|
133
|
+
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
|
134
|
+
when 'TDRA'
|
135
|
+
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
|
44
136
|
when 'MINIMUM'
|
45
137
|
shape_str
|
138
|
+
when 'PLAIN'
|
139
|
+
"#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
|
46
140
|
else
|
47
|
-
"#<#{shape_str(with_id: true)}>\n#{
|
141
|
+
"#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
|
48
142
|
end
|
49
143
|
end
|
50
144
|
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
145
|
+
# Shows some information about self in a transposed style.
|
146
|
+
#
|
147
|
+
# @param limit [Integer, :all]
|
148
|
+
# maximum number of variables (columns) to show.
|
149
|
+
# Shows all valiables (columns) if it is `:all`.
|
150
|
+
# @param tally [Integer]
|
151
|
+
# maximum level to use tally mode.
|
152
|
+
# Tally mode counts the occurrences of each element and shows as a hash
|
153
|
+
# with the elements as keys and the corresponding counts as values.
|
154
|
+
# @param elements [Integer]
|
155
|
+
# maximum number of elements to show values
|
156
|
+
# in each column.
|
157
|
+
# @return [nil]
|
158
|
+
# @example Default
|
159
|
+
# diamonds = diamonds.assign_left(:index) { indices }
|
160
|
+
# diamonds
|
161
|
+
#
|
162
|
+
# # =>
|
163
|
+
# #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
|
164
|
+
# index carat cut color clarity depth table price x y z
|
165
|
+
# <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
|
166
|
+
# 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
|
167
|
+
# 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
|
168
|
+
# 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
|
169
|
+
# 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
|
170
|
+
# 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
|
171
|
+
# 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
|
172
|
+
# 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
|
173
|
+
# 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
|
174
|
+
# 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
|
175
|
+
# 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
|
176
|
+
# 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
|
177
|
+
# 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
|
178
|
+
# 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
|
179
|
+
# 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
|
180
|
+
# 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
|
181
|
+
# 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
|
182
|
+
# 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
|
183
|
+
# 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
|
184
|
+
# 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
|
185
|
+
# 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
|
186
|
+
# : : : : : : : : : : : :
|
187
|
+
# 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
|
188
|
+
# 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
|
189
|
+
# 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
|
190
|
+
# 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
|
191
|
+
#
|
192
|
+
# diamonds.tdr
|
193
|
+
#
|
194
|
+
# # =>
|
195
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
196
|
+
# Vectors : 8 numeric, 3 strings
|
197
|
+
# # key type level data_preview
|
198
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
199
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
200
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
201
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
|
202
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
|
203
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
204
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
205
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
206
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
207
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
208
|
+
# ... 1 more Vector ...
|
209
|
+
#
|
210
|
+
# @example Show all variables
|
211
|
+
# diamonds.tdr(:all)
|
212
|
+
#
|
213
|
+
# # =>
|
214
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
215
|
+
# Vectors : 8 numeric, 3 strings
|
216
|
+
# # key type level data_preview
|
217
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
218
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
219
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
220
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
|
221
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
|
222
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
223
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
224
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
225
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
226
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
227
|
+
# 10 :z double 375 [2.43, 2.31, 2.31, 2.63, 2.75, ... ]
|
228
|
+
#
|
229
|
+
# @example Use tally mode up to 8 levels
|
230
|
+
# diamonds.tdr(tally: 8)
|
231
|
+
#
|
232
|
+
# # =>
|
233
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
234
|
+
# Vectors : 8 numeric, 3 strings
|
235
|
+
# # key type level data_preview
|
236
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
237
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
238
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
239
|
+
# 3 :color string 7 {"E"=>9797, "I"=>5422, "J"=>2808, "H"=>8304, "F"=>9542, "G"=>11292, "D"=>6775}
|
240
|
+
# 4 :clarity string 8 {"SI2"=>9194, "SI1"=>13065, "VS1"=>8171, "VS2"=>12258, "VVS2"=>5066, "VVS1"=>3655, "I1"=>741, "IF"=>1790}
|
241
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
242
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
243
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
244
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
245
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
246
|
+
# ... 1 more Vector ...
|
247
|
+
#
|
248
|
+
# @example Increase elements to show
|
249
|
+
# diamonds.tdr(elements: 10)
|
250
|
+
#
|
251
|
+
# # =>
|
252
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
253
|
+
# Vectors : 8 numeric, 3 strings
|
254
|
+
# # key type level data_preview
|
255
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... ]
|
256
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, ... ]
|
257
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
258
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", "J", "I", "H", "E", "H", ... ]
|
259
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", "VVS2", "VVS1", "SI1", "VS2", "VS1", ... ]
|
260
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, ... ]
|
261
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, 57.0, 57.0, 55.0, 61.0, 61.0, ... ]
|
262
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, 336, 336, 337, 337, 338, ... ]
|
263
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, 3.94, 3.95, 4.07, 3.87, 4.0, ... ]
|
264
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, ... ]
|
265
|
+
# ... 1 more Vector ...
|
266
|
+
#
|
54
267
|
def tdr(limit = 10, tally: 5, elements: 5)
|
55
268
|
puts tdr_str(limit, tally: tally, elements: elements)
|
56
269
|
end
|
270
|
+
alias_method :glimpse, :tdr
|
57
271
|
|
272
|
+
# Shortcut for `tdr(:all)``.
|
273
|
+
#
|
274
|
+
# @return (see #tdr)
|
275
|
+
#
|
276
|
+
def tdra
|
277
|
+
puts tdr_str(:all)
|
278
|
+
end
|
279
|
+
|
280
|
+
# rubocop:enable Layout/LineLength
|
281
|
+
|
282
|
+
# Returns some information about self in a transposed style by a string.
|
283
|
+
#
|
284
|
+
# @param (see #tdr)
|
285
|
+
# @option (see #tdr)
|
286
|
+
# @return [String] TDR style string.
|
287
|
+
#
|
58
288
|
def tdr_str(limit = 10, tally: 5, elements: 5)
|
59
289
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
60
290
|
end
|
61
291
|
|
292
|
+
# Returns html formatted text of self by IRuby::HTML.table.
|
293
|
+
#
|
294
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
295
|
+
# - If it is 'MINIMUM', returns shape by plain text.
|
296
|
+
# - If it is 'PLAIN', returns `#inspect` value by plain text.
|
297
|
+
# - If it is 'TDR', returns shape and transposed preview by plain text.
|
298
|
+
# - If it is 'TDRA', returns shape and transposed preview by plain text.
|
299
|
+
# - If it is 'TABLE' or otherwise, returns Table preview by html format.
|
300
|
+
# Default value of the ENV is 'TABLE'.
|
301
|
+
# @return [String]
|
302
|
+
# formatted string.
|
303
|
+
#
|
62
304
|
def to_iruby
|
63
305
|
require 'iruby'
|
64
306
|
return ['text/plain', '(empty DataFrame)'] if empty?
|
@@ -71,19 +313,39 @@ module RedAmber
|
|
71
313
|
['text/plain', shape_str]
|
72
314
|
when 'TDR'
|
73
315
|
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
316
|
+
when 'TDRA'
|
317
|
+
['text/plain', tdr_str(:all)]
|
74
318
|
else # 'TABLE'
|
75
319
|
['text/html', html_table]
|
76
320
|
end
|
77
321
|
end
|
78
322
|
|
79
|
-
|
80
|
-
|
323
|
+
# Return class and shape of self by a String.
|
324
|
+
#
|
325
|
+
# @param with_id [true, false]
|
326
|
+
# show id if true.
|
327
|
+
# @return [String]
|
328
|
+
# shape string.
|
329
|
+
# @example Default (without id)
|
330
|
+
# penguins.shape_str
|
331
|
+
#
|
332
|
+
# # =>
|
333
|
+
# "RedAmber::DataFrame : 344 x 8 Vectors"
|
334
|
+
#
|
335
|
+
# @example With id
|
336
|
+
# penguins.shape_str(with_id: true)
|
337
|
+
#
|
338
|
+
# # =>
|
339
|
+
# "RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000003980"
|
340
|
+
#
|
81
341
|
def shape_str(with_id: false)
|
82
342
|
shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
|
83
343
|
id = with_id ? format(', 0x%016x', object_id) : ''
|
84
344
|
"#{self.class} : #{shape_info}#{id}"
|
85
345
|
end
|
86
346
|
|
347
|
+
private # =====
|
348
|
+
|
87
349
|
def dataframe_info(limit, tally_level: 5, max_element: 5)
|
88
350
|
return '' if empty?
|
89
351
|
|
@@ -95,7 +357,7 @@ module RedAmber
|
|
95
357
|
quoted_keys = keys.map(&:inspect)
|
96
358
|
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
359
|
data: 'data_preview' }
|
98
|
-
header_format = make_header_format(levels, headers, quoted_keys)
|
360
|
+
header_format = make_header_format(levels, headers, quoted_keys, limit)
|
99
361
|
|
100
362
|
sio = StringIO.new # output string buffer
|
101
363
|
sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
|
@@ -125,9 +387,9 @@ module RedAmber
|
|
125
387
|
sio.string
|
126
388
|
end
|
127
389
|
|
128
|
-
def make_header_format(levels, headers, quoted_keys)
|
390
|
+
def make_header_format(levels, headers, quoted_keys, limit)
|
129
391
|
# find longest word to adjust width
|
130
|
-
w_idx = n_keys.to_s.size
|
392
|
+
w_idx = ([n_keys, limit].min - 1).to_s.size
|
131
393
|
w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
|
132
394
|
w_type = [types.map(&:size).max, headers[:type].size].max
|
133
395
|
w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
|
@@ -156,10 +418,17 @@ module RedAmber
|
|
156
418
|
end
|
157
419
|
|
158
420
|
def shorthand(vector, size, max_element)
|
159
|
-
|
160
|
-
a
|
161
|
-
|
162
|
-
|
421
|
+
a = vector.to_a.take(max_element)
|
422
|
+
a.map! do |e|
|
423
|
+
if e.nil?
|
424
|
+
'nil'
|
425
|
+
elsif vector.temporal?
|
426
|
+
e.to_s.inspect
|
427
|
+
else
|
428
|
+
e.inspect
|
429
|
+
end
|
430
|
+
end
|
431
|
+
a << '... ' if size > max_element
|
163
432
|
"[#{a.join(', ')}]"
|
164
433
|
end
|
165
434
|
|
@@ -201,13 +470,13 @@ module RedAmber
|
|
201
470
|
df = df.assign do
|
202
471
|
vectors.each_with_object({}) do |v, assigner|
|
203
472
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
204
|
-
|
473
|
+
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
205
474
|
assigner[v.key] =
|
206
475
|
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
207
476
|
end
|
208
477
|
end
|
209
478
|
|
210
|
-
width_list = df.vectors.map { |v| v.to_a.map(&:
|
479
|
+
width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
|
211
480
|
total_length = width_list[-1] # reserved for last column
|
212
481
|
|
213
482
|
formats = []
|
@@ -216,14 +485,13 @@ module RedAmber
|
|
216
485
|
w = width_list[i]
|
217
486
|
if total_length + w > width && i < df.n_keys - 1
|
218
487
|
row_ellipsis = i
|
219
|
-
formats <<
|
220
|
-
formats <<
|
488
|
+
formats << 3
|
489
|
+
formats << format_width(df.vectors[-1], original, width_list[-1])
|
221
490
|
break
|
222
491
|
end
|
223
|
-
formats <<
|
492
|
+
formats << format_width(v, original, w)
|
224
493
|
total_length += w
|
225
494
|
end
|
226
|
-
format_str = formats.join(' ')
|
227
495
|
|
228
496
|
str = StringIO.new
|
229
497
|
if row_ellipsis
|
@@ -232,22 +500,31 @@ module RedAmber
|
|
232
500
|
end
|
233
501
|
|
234
502
|
df.to_a.each do |row|
|
235
|
-
|
503
|
+
a =
|
504
|
+
row.zip(formats).map do |elem, format|
|
505
|
+
non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
|
506
|
+
if format.negative?
|
507
|
+
elem.ljust(-format + non_ascii_diff)
|
508
|
+
else
|
509
|
+
elem.rjust(format + non_ascii_diff)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
str.puts a.join(' ').rstrip
|
236
513
|
end
|
237
514
|
|
238
515
|
str.string
|
239
516
|
end
|
240
517
|
|
241
|
-
def
|
518
|
+
def format_width(vector, original, width)
|
242
519
|
if vector.key != INDEX_KEY && !original[vector.key].numeric?
|
243
|
-
|
520
|
+
-width
|
244
521
|
else
|
245
|
-
|
522
|
+
width
|
246
523
|
end
|
247
524
|
end
|
248
525
|
|
249
526
|
def html_table
|
250
|
-
reduced = size >
|
527
|
+
reduced = size > 10 ? self[0..5, -5..-1] : self
|
251
528
|
|
252
529
|
converted = reduced.assign do
|
253
530
|
vectors.select.with_object({}) do |vector, assigner|
|
@@ -267,12 +544,14 @@ module RedAmber
|
|
267
544
|
format('%g', element)
|
268
545
|
in Integer
|
269
546
|
format('%d', element)
|
547
|
+
else
|
548
|
+
element
|
270
549
|
end
|
271
550
|
end
|
272
551
|
end
|
273
552
|
end
|
274
553
|
|
275
|
-
html = IRuby::HTML.table(converted.to_h, maxrows:
|
554
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
|
276
555
|
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
277
556
|
end
|
278
557
|
end
|
@@ -1,38 +1,141 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-ins for the class DataFrame
|
5
5
|
module DataFrameIndexable
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
6
|
+
# Returns row index Vector.
|
7
|
+
#
|
8
|
+
# @overload indices
|
9
|
+
# return @indices as row indices (0...size).
|
10
|
+
#
|
11
|
+
# @return [Vector]
|
12
|
+
# a Vector of row indices.
|
13
|
+
# @example When `dataframe.size == 5`;
|
14
|
+
# dataframe.indices
|
15
|
+
#
|
16
|
+
# # =>
|
17
|
+
# #<RedAmber::Vector(:uint8, size=5):0x000000000000fb54>
|
18
|
+
# [0, 1, 2, 3, 4]
|
19
|
+
#
|
20
|
+
# @overload indices(start)
|
21
|
+
# return customized index Vector `(start..).take(size)`.
|
22
|
+
#
|
23
|
+
# @param start [#succ]
|
24
|
+
# element of start which have `#succ` method.
|
25
|
+
# @return [Vector]
|
26
|
+
# a Vector of row indices.
|
27
|
+
# @example When `dataframe.size == 5`;
|
28
|
+
# dataframe.indices(1)
|
29
|
+
#
|
30
|
+
# # =>
|
31
|
+
# #<RedAmber::Vector(:uint8, size=5):0x000000000000fba4>
|
32
|
+
# [1, 2, 3, 4, 5]
|
33
|
+
#
|
34
|
+
# dataframe.indices('a')
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::Vector(:string, size=5):0x000000000000fbb8>
|
37
|
+
# ["a", "b", "c", "d", "e"]
|
38
|
+
#
|
39
|
+
def indices(start = 0)
|
40
|
+
if start == 0 # rubocop:disable Style/NumericPredicate
|
41
|
+
@indices ||= Vector.new(0...size)
|
42
|
+
else
|
43
|
+
Vector.new((start..).take(size))
|
44
|
+
end
|
13
45
|
end
|
46
|
+
alias_method :indexes, :indices
|
14
47
|
|
48
|
+
# Return sorted indexes of self by a Vector.
|
49
|
+
#
|
15
50
|
# @param sort_keys [Arrow::SortKey]
|
16
51
|
# :key, "key" or "+key" denotes ascending,
|
17
|
-
# "-key" denotes descending order
|
18
|
-
# @return [RedAmber::Vector]
|
52
|
+
# :"-key" or "-key" denotes descending order.
|
53
|
+
# @return [RedAmber::Vector]
|
54
|
+
# sorted indices in Vector.
|
55
|
+
# @example
|
56
|
+
# df
|
57
|
+
#
|
58
|
+
# # =>
|
59
|
+
# x y
|
60
|
+
# <uint8> <string>
|
61
|
+
# 0 3 B
|
62
|
+
# 1 5 A
|
63
|
+
# 2 1 B
|
64
|
+
# 3 4 A
|
65
|
+
# 4 2 C
|
66
|
+
#
|
67
|
+
# df.sort_indices('x')
|
68
|
+
#
|
69
|
+
# # =>
|
70
|
+
# #<RedAmber::Vector(:uint64, size=5):0x0000000000003854>
|
71
|
+
# [2, 4, 0, 3, 1]
|
72
|
+
#
|
19
73
|
def sort_indices(*sort_keys)
|
20
74
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
75
|
Vector.create(indices)
|
22
76
|
end
|
23
77
|
|
24
|
-
#
|
78
|
+
# Sort the contents of self.
|
79
|
+
#
|
80
|
+
# @param sort_keys [Arrow::SortKey]
|
81
|
+
# :key, "key" or "+key" denotes ascending,
|
82
|
+
# :"-key" or "-key" denotes descending order.
|
83
|
+
# @return [RedAmber::DataFrame]
|
84
|
+
# sorted DataFrame.
|
85
|
+
# @example Sort by a key
|
86
|
+
# df
|
87
|
+
#
|
88
|
+
# # =>
|
89
|
+
# x y
|
90
|
+
# <uint8> <string>
|
91
|
+
# 0 3 B
|
92
|
+
# 1 5 A
|
93
|
+
# 2 1 B
|
94
|
+
# 3 4 A
|
95
|
+
# 4 2 C
|
96
|
+
#
|
97
|
+
# df.sort('y')
|
98
|
+
#
|
99
|
+
# # =>
|
100
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000382c>
|
101
|
+
# x y
|
102
|
+
# <uint8> <string>
|
103
|
+
# 0 5 A
|
104
|
+
# 1 4 A
|
105
|
+
# 2 3 B
|
106
|
+
# 3 1 B
|
107
|
+
# 4 2 C
|
108
|
+
#
|
109
|
+
# @example Sort by two keys
|
110
|
+
# df.sort('y', 'x')
|
111
|
+
#
|
112
|
+
# # =>
|
113
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003890>
|
114
|
+
# x y
|
115
|
+
# <uint8> <string>
|
116
|
+
# 0 4 A
|
117
|
+
# 1 5 A
|
118
|
+
# 2 1 B
|
119
|
+
# 3 3 B
|
120
|
+
# 4 2 C
|
121
|
+
#
|
122
|
+
# @example Sort in descending order
|
123
|
+
# df.sort('-x')
|
124
|
+
#
|
125
|
+
# # =>
|
126
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003840>
|
127
|
+
# x y
|
128
|
+
# <uint8> <string>
|
129
|
+
# 0 5 A
|
130
|
+
# 1 4 A
|
131
|
+
# 2 3 B
|
132
|
+
# 3 2 C
|
133
|
+
# 4 1 B
|
134
|
+
#
|
25
135
|
def sort(*sort_keys)
|
26
136
|
indices = @table.sort_indices(sort_keys.flatten)
|
27
137
|
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def new_dataframe_by(index_array)
|
34
|
-
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
DataFrame.create(t)
|
138
|
+
take(indices)
|
36
139
|
end
|
37
140
|
end
|
38
141
|
end
|