red_amber 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -3,22 +3,70 @@
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
module RedAmber
|
6
|
-
#
|
6
|
+
# Mix-in for the class DataFrame
|
7
7
|
module DataFrameDisplayable
|
8
|
+
# Refineme class String
|
9
|
+
using RefineString
|
10
|
+
|
11
|
+
# Used internally to display table.
|
8
12
|
INDEX_KEY = :index_key_for_format_table
|
13
|
+
private_constant :INDEX_KEY
|
14
|
+
|
15
|
+
# rubocop:disable Layout/LineLength
|
9
16
|
|
10
|
-
|
17
|
+
# Show a preview of self as a string.
|
18
|
+
#
|
19
|
+
# @param width [Integer]
|
20
|
+
# maximum size of result.
|
21
|
+
# @param head [Integer]
|
22
|
+
# number of records to show from head.
|
23
|
+
# @param tail [Integer]
|
24
|
+
# number of records to show at tail.
|
25
|
+
# @return [String]
|
26
|
+
# string representation of self.
|
27
|
+
# @example Show penguins dataset
|
28
|
+
# puts penguins.to_s
|
29
|
+
#
|
30
|
+
# # =>
|
31
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
|
32
|
+
# <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
|
33
|
+
# 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
|
34
|
+
# 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
|
35
|
+
# 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
|
36
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
|
37
|
+
# 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
|
38
|
+
# : : : : : : : ... :
|
39
|
+
# 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
|
40
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
|
41
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
|
42
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
|
43
|
+
#
|
44
|
+
def to_s(width: 90, head: 5, tail: 4)
|
11
45
|
return '' if empty?
|
12
46
|
|
13
|
-
format_table(width: width)
|
47
|
+
format_table(width: width, head: head, tail: tail)
|
14
48
|
end
|
15
49
|
|
16
|
-
# Show statistical summary by a new
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
50
|
+
# Show statistical summary by a new DataFrame.
|
51
|
+
#
|
52
|
+
# This method will make stats only for numeric columns.
|
53
|
+
# - NaNs are ignored.
|
54
|
+
# - `count` shows non-NaN counts.
|
55
|
+
#
|
56
|
+
# @return [DataFrame]
|
57
|
+
# a new dataframe.
|
58
|
+
# @example Statistical summary of penguins dataset
|
59
|
+
# puts penguins.summary.to_s
|
60
|
+
#
|
61
|
+
# # =>
|
62
|
+
# variables count mean std min 25% median 75% max
|
63
|
+
# <dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
|
64
|
+
# 0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
|
65
|
+
# 1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
|
66
|
+
# 2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
|
67
|
+
# 3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
|
68
|
+
# 4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
|
20
69
|
#
|
21
|
-
# @return [DataFrame] a new dataframe.
|
22
70
|
def summary
|
23
71
|
num_keys = keys.select { |key| self[key].numeric? }
|
24
72
|
|
@@ -36,29 +84,223 @@ module RedAmber
|
|
36
84
|
end
|
37
85
|
alias_method :describe, :summary
|
38
86
|
|
87
|
+
# Show information of self.
|
88
|
+
#
|
89
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
90
|
+
# - If it is 'TDR', returns class name, shape, object id
|
91
|
+
# and transposed preview for up to 10 variables.
|
92
|
+
# - If it is 'TDRA', returns class name, shape, object id
|
93
|
+
# and transposed preview for all variables.
|
94
|
+
# - If it is 'MINIMUM', returns class name and shape.
|
95
|
+
# - If it is 'PLAIN', returns class name, shape and Table preview
|
96
|
+
# for up to 512 columns and 128 columns.
|
97
|
+
# - If it is 'TABLE' or otherwise, returns class name, shape, object id
|
98
|
+
# and Table preview for up to 512 rows and 512 columns.
|
99
|
+
# Default value of the ENV is 'Table'.
|
100
|
+
# @return [String]
|
101
|
+
# information of self.
|
102
|
+
# @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
|
103
|
+
# puts df.inspect
|
104
|
+
#
|
105
|
+
# # =>
|
106
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
|
107
|
+
# x y
|
108
|
+
# <uint8> <string>
|
109
|
+
# 0 1 A
|
110
|
+
# 1 2 B
|
111
|
+
# 2 3 C
|
112
|
+
#
|
113
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'TDR'
|
114
|
+
# puts df.inspect
|
115
|
+
#
|
116
|
+
# # =>
|
117
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
|
118
|
+
# Vectors : 1 numeric, 1 string
|
119
|
+
# # key type level data_preview
|
120
|
+
# 0 :x uint8 3 [1, 2, 3]
|
121
|
+
# 1 :y string 3 ["A", "B", "C"]
|
122
|
+
#
|
123
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
|
124
|
+
# puts df.inspect
|
125
|
+
#
|
126
|
+
# # =>
|
127
|
+
# RedAmber::DataFrame : 3 x 2 Vectors
|
128
|
+
#
|
39
129
|
def inspect
|
40
130
|
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
131
|
case mode.upcase
|
42
132
|
when 'TDR'
|
43
|
-
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(
|
133
|
+
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
|
134
|
+
when 'TDRA'
|
135
|
+
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
|
44
136
|
when 'MINIMUM'
|
45
137
|
shape_str
|
138
|
+
when 'PLAIN'
|
139
|
+
"#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
|
46
140
|
else
|
47
|
-
"#<#{shape_str(with_id: true)}>\n#{
|
141
|
+
"#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
|
48
142
|
end
|
49
143
|
end
|
50
144
|
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
145
|
+
# Shows some information about self in a transposed style.
|
146
|
+
#
|
147
|
+
# @param limit [Integer, :all]
|
148
|
+
# maximum number of variables (columns) to show.
|
149
|
+
# Shows all valiables (columns) if it is `:all`.
|
150
|
+
# @param tally [Integer]
|
151
|
+
# maximum level to use tally mode.
|
152
|
+
# Tally mode counts the occurrences of each element and shows as a hash
|
153
|
+
# with the elements as keys and the corresponding counts as values.
|
154
|
+
# @param elements [Integer]
|
155
|
+
# maximum number of elements to show values
|
156
|
+
# in each column.
|
157
|
+
# @return [nil]
|
158
|
+
# @example Default
|
159
|
+
# diamonds = diamonds.assign_left(:index) { indices }
|
160
|
+
# diamonds
|
161
|
+
#
|
162
|
+
# # =>
|
163
|
+
# #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
|
164
|
+
# index carat cut color clarity depth table price x y z
|
165
|
+
# <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
|
166
|
+
# 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
|
167
|
+
# 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
|
168
|
+
# 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
|
169
|
+
# 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
|
170
|
+
# 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
|
171
|
+
# 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
|
172
|
+
# 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
|
173
|
+
# 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
|
174
|
+
# 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
|
175
|
+
# 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
|
176
|
+
# 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
|
177
|
+
# 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
|
178
|
+
# 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
|
179
|
+
# 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
|
180
|
+
# 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
|
181
|
+
# 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
|
182
|
+
# 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
|
183
|
+
# 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
|
184
|
+
# 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
|
185
|
+
# 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
|
186
|
+
# : : : : : : : : : : : :
|
187
|
+
# 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
|
188
|
+
# 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
|
189
|
+
# 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
|
190
|
+
# 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
|
191
|
+
#
|
192
|
+
# diamonds.tdr
|
193
|
+
#
|
194
|
+
# # =>
|
195
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
196
|
+
# Vectors : 8 numeric, 3 strings
|
197
|
+
# # key type level data_preview
|
198
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
199
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
200
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
201
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
|
202
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
|
203
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
204
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
205
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
206
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
207
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
208
|
+
# ... 1 more Vector ...
|
209
|
+
#
|
210
|
+
# @example Show all variables
|
211
|
+
# diamonds.tdr(:all)
|
212
|
+
#
|
213
|
+
# # =>
|
214
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
215
|
+
# Vectors : 8 numeric, 3 strings
|
216
|
+
# # key type level data_preview
|
217
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
218
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
219
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
220
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
|
221
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
|
222
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
223
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
224
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
225
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
226
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
227
|
+
# 10 :z double 375 [2.43, 2.31, 2.31, 2.63, 2.75, ... ]
|
228
|
+
#
|
229
|
+
# @example Use tally mode up to 8 levels
|
230
|
+
# diamonds.tdr(tally: 8)
|
231
|
+
#
|
232
|
+
# # =>
|
233
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
234
|
+
# Vectors : 8 numeric, 3 strings
|
235
|
+
# # key type level data_preview
|
236
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
|
237
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
|
238
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
239
|
+
# 3 :color string 7 {"E"=>9797, "I"=>5422, "J"=>2808, "H"=>8304, "F"=>9542, "G"=>11292, "D"=>6775}
|
240
|
+
# 4 :clarity string 8 {"SI2"=>9194, "SI1"=>13065, "VS1"=>8171, "VS2"=>12258, "VVS2"=>5066, "VVS1"=>3655, "I1"=>741, "IF"=>1790}
|
241
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
|
242
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
|
243
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
|
244
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
|
245
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
|
246
|
+
# ... 1 more Vector ...
|
247
|
+
#
|
248
|
+
# @example Increase elements to show
|
249
|
+
# diamonds.tdr(elements: 10)
|
250
|
+
#
|
251
|
+
# # =>
|
252
|
+
# RedAmber::DataFrame : 53940 x 11 Vectors
|
253
|
+
# Vectors : 8 numeric, 3 strings
|
254
|
+
# # key type level data_preview
|
255
|
+
# 0 :index uint16 53940 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... ]
|
256
|
+
# 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, ... ]
|
257
|
+
# 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
|
258
|
+
# 3 :color string 7 ["E", "E", "E", "I", "J", "J", "I", "H", "E", "H", ... ]
|
259
|
+
# 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", "VVS2", "VVS1", "SI1", "VS2", "VS1", ... ]
|
260
|
+
# 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, ... ]
|
261
|
+
# 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, 57.0, 57.0, 55.0, 61.0, 61.0, ... ]
|
262
|
+
# 7 :price uint16 11602 [326, 326, 327, 334, 335, 336, 336, 337, 337, 338, ... ]
|
263
|
+
# 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, 3.94, 3.95, 4.07, 3.87, 4.0, ... ]
|
264
|
+
# 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, ... ]
|
265
|
+
# ... 1 more Vector ...
|
266
|
+
#
|
54
267
|
def tdr(limit = 10, tally: 5, elements: 5)
|
55
268
|
puts tdr_str(limit, tally: tally, elements: elements)
|
56
269
|
end
|
270
|
+
alias_method :glimpse, :tdr
|
57
271
|
|
272
|
+
# Shortcut for `tdr(:all)``.
|
273
|
+
#
|
274
|
+
# @return (see #tdr)
|
275
|
+
#
|
276
|
+
def tdra
|
277
|
+
puts tdr_str(:all)
|
278
|
+
end
|
279
|
+
|
280
|
+
# rubocop:enable Layout/LineLength
|
281
|
+
|
282
|
+
# Returns some information about self in a transposed style by a string.
|
283
|
+
#
|
284
|
+
# @param (see #tdr)
|
285
|
+
# @option (see #tdr)
|
286
|
+
# @return [String] TDR style string.
|
287
|
+
#
|
58
288
|
def tdr_str(limit = 10, tally: 5, elements: 5)
|
59
289
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
60
290
|
end
|
61
291
|
|
292
|
+
# Returns html formatted text of self by IRuby::HTML.table.
|
293
|
+
#
|
294
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
295
|
+
# - If it is 'MINIMUM', returns shape by plain text.
|
296
|
+
# - If it is 'PLAIN', returns `#inspect` value by plain text.
|
297
|
+
# - If it is 'TDR', returns shape and transposed preview by plain text.
|
298
|
+
# - If it is 'TDRA', returns shape and transposed preview by plain text.
|
299
|
+
# - If it is 'TABLE' or otherwise, returns Table preview by html format.
|
300
|
+
# Default value of the ENV is 'TABLE'.
|
301
|
+
# @return [String]
|
302
|
+
# formatted string.
|
303
|
+
#
|
62
304
|
def to_iruby
|
63
305
|
require 'iruby'
|
64
306
|
return ['text/plain', '(empty DataFrame)'] if empty?
|
@@ -71,19 +313,39 @@ module RedAmber
|
|
71
313
|
['text/plain', shape_str]
|
72
314
|
when 'TDR'
|
73
315
|
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
316
|
+
when 'TDRA'
|
317
|
+
['text/plain', tdr_str(:all)]
|
74
318
|
else # 'TABLE'
|
75
319
|
['text/html', html_table]
|
76
320
|
end
|
77
321
|
end
|
78
322
|
|
79
|
-
|
80
|
-
|
323
|
+
# Return class and shape of self by a String.
|
324
|
+
#
|
325
|
+
# @param with_id [true, false]
|
326
|
+
# show id if true.
|
327
|
+
# @return [String]
|
328
|
+
# shape string.
|
329
|
+
# @example Default (without id)
|
330
|
+
# penguins.shape_str
|
331
|
+
#
|
332
|
+
# # =>
|
333
|
+
# "RedAmber::DataFrame : 344 x 8 Vectors"
|
334
|
+
#
|
335
|
+
# @example With id
|
336
|
+
# penguins.shape_str(with_id: true)
|
337
|
+
#
|
338
|
+
# # =>
|
339
|
+
# "RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000003980"
|
340
|
+
#
|
81
341
|
def shape_str(with_id: false)
|
82
342
|
shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
|
83
343
|
id = with_id ? format(', 0x%016x', object_id) : ''
|
84
344
|
"#{self.class} : #{shape_info}#{id}"
|
85
345
|
end
|
86
346
|
|
347
|
+
private # =====
|
348
|
+
|
87
349
|
def dataframe_info(limit, tally_level: 5, max_element: 5)
|
88
350
|
return '' if empty?
|
89
351
|
|
@@ -95,7 +357,7 @@ module RedAmber
|
|
95
357
|
quoted_keys = keys.map(&:inspect)
|
96
358
|
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
359
|
data: 'data_preview' }
|
98
|
-
header_format = make_header_format(levels, headers, quoted_keys)
|
360
|
+
header_format = make_header_format(levels, headers, quoted_keys, limit)
|
99
361
|
|
100
362
|
sio = StringIO.new # output string buffer
|
101
363
|
sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
|
@@ -125,9 +387,9 @@ module RedAmber
|
|
125
387
|
sio.string
|
126
388
|
end
|
127
389
|
|
128
|
-
def make_header_format(levels, headers, quoted_keys)
|
390
|
+
def make_header_format(levels, headers, quoted_keys, limit)
|
129
391
|
# find longest word to adjust width
|
130
|
-
w_idx = n_keys.to_s.size
|
392
|
+
w_idx = ([n_keys, limit].min - 1).to_s.size
|
131
393
|
w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
|
132
394
|
w_type = [types.map(&:size).max, headers[:type].size].max
|
133
395
|
w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
|
@@ -156,10 +418,17 @@ module RedAmber
|
|
156
418
|
end
|
157
419
|
|
158
420
|
def shorthand(vector, size, max_element)
|
159
|
-
|
160
|
-
a
|
161
|
-
|
162
|
-
|
421
|
+
a = vector.to_a.take(max_element)
|
422
|
+
a.map! do |e|
|
423
|
+
if e.nil?
|
424
|
+
'nil'
|
425
|
+
elsif vector.temporal?
|
426
|
+
e.to_s.inspect
|
427
|
+
else
|
428
|
+
e.inspect
|
429
|
+
end
|
430
|
+
end
|
431
|
+
a << '... ' if size > max_element
|
163
432
|
"[#{a.join(', ')}]"
|
164
433
|
end
|
165
434
|
|
@@ -201,13 +470,13 @@ module RedAmber
|
|
201
470
|
df = df.assign do
|
202
471
|
vectors.each_with_object({}) do |v, assigner|
|
203
472
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
204
|
-
|
473
|
+
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
205
474
|
assigner[v.key] =
|
206
475
|
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
207
476
|
end
|
208
477
|
end
|
209
478
|
|
210
|
-
width_list = df.vectors.map { |v| v.to_a.map(&:
|
479
|
+
width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
|
211
480
|
total_length = width_list[-1] # reserved for last column
|
212
481
|
|
213
482
|
formats = []
|
@@ -216,14 +485,13 @@ module RedAmber
|
|
216
485
|
w = width_list[i]
|
217
486
|
if total_length + w > width && i < df.n_keys - 1
|
218
487
|
row_ellipsis = i
|
219
|
-
formats <<
|
220
|
-
formats <<
|
488
|
+
formats << 3
|
489
|
+
formats << format_width(df.vectors[-1], original, width_list[-1])
|
221
490
|
break
|
222
491
|
end
|
223
|
-
formats <<
|
492
|
+
formats << format_width(v, original, w)
|
224
493
|
total_length += w
|
225
494
|
end
|
226
|
-
format_str = formats.join(' ')
|
227
495
|
|
228
496
|
str = StringIO.new
|
229
497
|
if row_ellipsis
|
@@ -232,22 +500,31 @@ module RedAmber
|
|
232
500
|
end
|
233
501
|
|
234
502
|
df.to_a.each do |row|
|
235
|
-
|
503
|
+
a =
|
504
|
+
row.zip(formats).map do |elem, format|
|
505
|
+
non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
|
506
|
+
if format.negative?
|
507
|
+
elem.ljust(-format + non_ascii_diff)
|
508
|
+
else
|
509
|
+
elem.rjust(format + non_ascii_diff)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
str.puts a.join(' ').rstrip
|
236
513
|
end
|
237
514
|
|
238
515
|
str.string
|
239
516
|
end
|
240
517
|
|
241
|
-
def
|
518
|
+
def format_width(vector, original, width)
|
242
519
|
if vector.key != INDEX_KEY && !original[vector.key].numeric?
|
243
|
-
|
520
|
+
-width
|
244
521
|
else
|
245
|
-
|
522
|
+
width
|
246
523
|
end
|
247
524
|
end
|
248
525
|
|
249
526
|
def html_table
|
250
|
-
reduced = size >
|
527
|
+
reduced = size > 10 ? self[0..5, -5..-1] : self
|
251
528
|
|
252
529
|
converted = reduced.assign do
|
253
530
|
vectors.select.with_object({}) do |vector, assigner|
|
@@ -267,12 +544,14 @@ module RedAmber
|
|
267
544
|
format('%g', element)
|
268
545
|
in Integer
|
269
546
|
format('%d', element)
|
547
|
+
else
|
548
|
+
element
|
270
549
|
end
|
271
550
|
end
|
272
551
|
end
|
273
552
|
end
|
274
553
|
|
275
|
-
html = IRuby::HTML.table(converted.to_h, maxrows:
|
554
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
|
276
555
|
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
277
556
|
end
|
278
557
|
end
|
@@ -1,38 +1,141 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-ins for the class DataFrame
|
5
5
|
module DataFrameIndexable
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
6
|
+
# Returns row index Vector.
|
7
|
+
#
|
8
|
+
# @overload indices
|
9
|
+
# return @indices as row indices (0...size).
|
10
|
+
#
|
11
|
+
# @return [Vector]
|
12
|
+
# a Vector of row indices.
|
13
|
+
# @example When `dataframe.size == 5`;
|
14
|
+
# dataframe.indices
|
15
|
+
#
|
16
|
+
# # =>
|
17
|
+
# #<RedAmber::Vector(:uint8, size=5):0x000000000000fb54>
|
18
|
+
# [0, 1, 2, 3, 4]
|
19
|
+
#
|
20
|
+
# @overload indices(start)
|
21
|
+
# return customized index Vector `(start..).take(size)`.
|
22
|
+
#
|
23
|
+
# @param start [#succ]
|
24
|
+
# element of start which have `#succ` method.
|
25
|
+
# @return [Vector]
|
26
|
+
# a Vector of row indices.
|
27
|
+
# @example When `dataframe.size == 5`;
|
28
|
+
# dataframe.indices(1)
|
29
|
+
#
|
30
|
+
# # =>
|
31
|
+
# #<RedAmber::Vector(:uint8, size=5):0x000000000000fba4>
|
32
|
+
# [1, 2, 3, 4, 5]
|
33
|
+
#
|
34
|
+
# dataframe.indices('a')
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::Vector(:string, size=5):0x000000000000fbb8>
|
37
|
+
# ["a", "b", "c", "d", "e"]
|
38
|
+
#
|
39
|
+
def indices(start = 0)
|
40
|
+
if start == 0 # rubocop:disable Style/NumericPredicate
|
41
|
+
@indices ||= Vector.new(0...size)
|
42
|
+
else
|
43
|
+
Vector.new((start..).take(size))
|
44
|
+
end
|
13
45
|
end
|
46
|
+
alias_method :indexes, :indices
|
14
47
|
|
48
|
+
# Return sorted indexes of self by a Vector.
|
49
|
+
#
|
15
50
|
# @param sort_keys [Arrow::SortKey]
|
16
51
|
# :key, "key" or "+key" denotes ascending,
|
17
|
-
# "-key" denotes descending order
|
18
|
-
# @return [RedAmber::Vector]
|
52
|
+
# :"-key" or "-key" denotes descending order.
|
53
|
+
# @return [RedAmber::Vector]
|
54
|
+
# sorted indices in Vector.
|
55
|
+
# @example
|
56
|
+
# df
|
57
|
+
#
|
58
|
+
# # =>
|
59
|
+
# x y
|
60
|
+
# <uint8> <string>
|
61
|
+
# 0 3 B
|
62
|
+
# 1 5 A
|
63
|
+
# 2 1 B
|
64
|
+
# 3 4 A
|
65
|
+
# 4 2 C
|
66
|
+
#
|
67
|
+
# df.sort_indices('x')
|
68
|
+
#
|
69
|
+
# # =>
|
70
|
+
# #<RedAmber::Vector(:uint64, size=5):0x0000000000003854>
|
71
|
+
# [2, 4, 0, 3, 1]
|
72
|
+
#
|
19
73
|
def sort_indices(*sort_keys)
|
20
74
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
75
|
Vector.create(indices)
|
22
76
|
end
|
23
77
|
|
24
|
-
#
|
78
|
+
# Sort the contents of self.
|
79
|
+
#
|
80
|
+
# @param sort_keys [Arrow::SortKey]
|
81
|
+
# :key, "key" or "+key" denotes ascending,
|
82
|
+
# :"-key" or "-key" denotes descending order.
|
83
|
+
# @return [RedAmber::DataFrame]
|
84
|
+
# sorted DataFrame.
|
85
|
+
# @example Sort by a key
|
86
|
+
# df
|
87
|
+
#
|
88
|
+
# # =>
|
89
|
+
# x y
|
90
|
+
# <uint8> <string>
|
91
|
+
# 0 3 B
|
92
|
+
# 1 5 A
|
93
|
+
# 2 1 B
|
94
|
+
# 3 4 A
|
95
|
+
# 4 2 C
|
96
|
+
#
|
97
|
+
# df.sort('y')
|
98
|
+
#
|
99
|
+
# # =>
|
100
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000382c>
|
101
|
+
# x y
|
102
|
+
# <uint8> <string>
|
103
|
+
# 0 5 A
|
104
|
+
# 1 4 A
|
105
|
+
# 2 3 B
|
106
|
+
# 3 1 B
|
107
|
+
# 4 2 C
|
108
|
+
#
|
109
|
+
# @example Sort by two keys
|
110
|
+
# df.sort('y', 'x')
|
111
|
+
#
|
112
|
+
# # =>
|
113
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003890>
|
114
|
+
# x y
|
115
|
+
# <uint8> <string>
|
116
|
+
# 0 4 A
|
117
|
+
# 1 5 A
|
118
|
+
# 2 1 B
|
119
|
+
# 3 3 B
|
120
|
+
# 4 2 C
|
121
|
+
#
|
122
|
+
# @example Sort in descending order
|
123
|
+
# df.sort('-x')
|
124
|
+
#
|
125
|
+
# # =>
|
126
|
+
# #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003840>
|
127
|
+
# x y
|
128
|
+
# <uint8> <string>
|
129
|
+
# 0 5 A
|
130
|
+
# 1 4 A
|
131
|
+
# 2 3 B
|
132
|
+
# 3 2 C
|
133
|
+
# 4 1 B
|
134
|
+
#
|
25
135
|
def sort(*sort_keys)
|
26
136
|
indices = @table.sort_indices(sort_keys.flatten)
|
27
137
|
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def new_dataframe_by(index_array)
|
34
|
-
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
DataFrame.create(t)
|
138
|
+
take(indices)
|
36
139
|
end
|
37
140
|
end
|
38
141
|
end
|