red_amber 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -3,22 +3,70 @@
3
3
  require 'stringio'
4
4
 
5
5
  module RedAmber
6
- # mix-ins for the class DataFrame
6
+ # Mix-in for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ # Refineme class String
9
+ using RefineString
10
+
11
+ # Used internally to display table.
8
12
  INDEX_KEY = :index_key_for_format_table
13
+ private_constant :INDEX_KEY
14
+
15
+ # rubocop:disable Layout/LineLength
9
16
 
10
- def to_s(width: 80)
17
+ # Show a preview of self as a string.
18
+ #
19
+ # @param width [Integer]
20
+ # maximum size of result.
21
+ # @param head [Integer]
22
+ # number of records to show from head.
23
+ # @param tail [Integer]
24
+ # number of records to show at tail.
25
+ # @return [String]
26
+ # string representation of self.
27
+ # @example Show penguins dataset
28
+ # puts penguins.to_s
29
+ #
30
+ # # =>
31
+ # species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
32
+ # <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
33
+ # 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
34
+ # 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
35
+ # 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
36
+ # 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
37
+ # 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
38
+ # : : : : : : : ... :
39
+ # 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
40
+ # 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
41
+ # 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
42
+ # 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
43
+ #
44
+ def to_s(width: 90, head: 5, tail: 4)
11
45
  return '' if empty?
12
46
 
13
- format_table(width: width)
47
+ format_table(width: width, head: head, tail: tail)
14
48
  end
15
49
 
16
- # Show statistical summary by a new DatFrame.
17
- # Make stats for numeric columns only.
18
- # NaNs are ignored.
19
- # Counts also show non-NaN counts.
50
+ # Show statistical summary by a new DataFrame.
51
+ #
52
+ # This method will make stats only for numeric columns.
53
+ # - NaNs are ignored.
54
+ # - `count` shows non-NaN counts.
55
+ #
56
+ # @return [DataFrame]
57
+ # a new dataframe.
58
+ # @example Statistical summary of penguins dataset
59
+ # puts penguins.summary.to_s
60
+ #
61
+ # # =>
62
+ # variables count mean std min 25% median 75% max
63
+ # <dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
64
+ # 0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
65
+ # 1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
66
+ # 2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
67
+ # 3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
68
+ # 4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
20
69
  #
21
- # @return [DataFrame] a new dataframe.
22
70
  def summary
23
71
  num_keys = keys.select { |key| self[key].numeric? }
24
72
 
@@ -36,29 +84,223 @@ module RedAmber
36
84
  end
37
85
  alias_method :describe, :summary
38
86
 
87
+ # Show information of self.
88
+ #
89
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
90
+ # - If it is 'TDR', returns class name, shape, object id
91
+ # and transposed preview for up to 10 variables.
92
+ # - If it is 'TDRA', returns class name, shape, object id
93
+ # and transposed preview for all variables.
94
+ # - If it is 'MINIMUM', returns class name and shape.
95
+ # - If it is 'PLAIN', returns class name, shape and Table preview
96
+ # for up to 512 columns and 128 columns.
97
+ # - If it is 'TABLE' or otherwise, returns class name, shape, object id
98
+ # and Table preview for up to 512 rows and 512 columns.
99
+ # Default value of the ENV is 'Table'.
100
+ # @return [String]
101
+ # information of self.
102
+ # @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
103
+ # puts df.inspect
104
+ #
105
+ # # =>
106
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
107
+ # x y
108
+ # <uint8> <string>
109
+ # 0 1 A
110
+ # 1 2 B
111
+ # 2 3 C
112
+ #
113
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'TDR'
114
+ # puts df.inspect
115
+ #
116
+ # # =>
117
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
118
+ # Vectors : 1 numeric, 1 string
119
+ # # key type level data_preview
120
+ # 0 :x uint8 3 [1, 2, 3]
121
+ # 1 :y string 3 ["A", "B", "C"]
122
+ #
123
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
124
+ # puts df.inspect
125
+ #
126
+ # # =>
127
+ # RedAmber::DataFrame : 3 x 2 Vectors
128
+ #
39
129
  def inspect
40
130
  mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
131
  case mode.upcase
42
132
  when 'TDR'
43
- "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
133
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
134
+ when 'TDRA'
135
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
44
136
  when 'MINIMUM'
45
137
  shape_str
138
+ when 'PLAIN'
139
+ "#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
46
140
  else
47
- "#<#{shape_str(with_id: true)}>\n#{self}"
141
+ "#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
48
142
  end
49
143
  end
50
144
 
51
- # - limit: max num of Vectors to show
52
- # - tally: max level to use tally mode
53
- # - elements: max element to show values in each vector
145
+ # Shows some information about self in a transposed style.
146
+ #
147
+ # @param limit [Integer, :all]
148
+ # maximum number of variables (columns) to show.
149
+ # Shows all valiables (columns) if it is `:all`.
150
+ # @param tally [Integer]
151
+ # maximum level to use tally mode.
152
+ # Tally mode counts the occurrences of each element and shows as a hash
153
+ # with the elements as keys and the corresponding counts as values.
154
+ # @param elements [Integer]
155
+ # maximum number of elements to show values
156
+ # in each column.
157
+ # @return [nil]
158
+ # @example Default
159
+ # diamonds = diamonds.assign_left(:index) { indices }
160
+ # diamonds
161
+ #
162
+ # # =>
163
+ # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
164
+ # index carat cut color clarity depth table price x y z
165
+ # <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
166
+ # 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
167
+ # 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
168
+ # 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
169
+ # 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
170
+ # 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
171
+ # 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
172
+ # 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
173
+ # 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
174
+ # 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
175
+ # 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
176
+ # 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
177
+ # 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
178
+ # 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
179
+ # 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
180
+ # 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
181
+ # 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
182
+ # 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
183
+ # 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
184
+ # 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
185
+ # 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
186
+ # : : : : : : : : : : : :
187
+ # 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
188
+ # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
189
+ # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
190
+ # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
191
+ #
192
+ # diamonds.tdr
193
+ #
194
+ # # =>
195
+ # RedAmber::DataFrame : 53940 x 11 Vectors
196
+ # Vectors : 8 numeric, 3 strings
197
+ # # key type level data_preview
198
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
199
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
200
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
201
+ # 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
202
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
203
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
204
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
205
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
206
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
207
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
208
+ # ... 1 more Vector ...
209
+ #
210
+ # @example Show all variables
211
+ # diamonds.tdr(:all)
212
+ #
213
+ # # =>
214
+ # RedAmber::DataFrame : 53940 x 11 Vectors
215
+ # Vectors : 8 numeric, 3 strings
216
+ # # key type level data_preview
217
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
218
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
219
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
220
+ # 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
221
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
222
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
223
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
224
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
225
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
226
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
227
+ # 10 :z double 375 [2.43, 2.31, 2.31, 2.63, 2.75, ... ]
228
+ #
229
+ # @example Use tally mode up to 8 levels
230
+ # diamonds.tdr(tally: 8)
231
+ #
232
+ # # =>
233
+ # RedAmber::DataFrame : 53940 x 11 Vectors
234
+ # Vectors : 8 numeric, 3 strings
235
+ # # key type level data_preview
236
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
237
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
238
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
239
+ # 3 :color string 7 {"E"=>9797, "I"=>5422, "J"=>2808, "H"=>8304, "F"=>9542, "G"=>11292, "D"=>6775}
240
+ # 4 :clarity string 8 {"SI2"=>9194, "SI1"=>13065, "VS1"=>8171, "VS2"=>12258, "VVS2"=>5066, "VVS1"=>3655, "I1"=>741, "IF"=>1790}
241
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
242
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
243
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
244
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
245
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
246
+ # ... 1 more Vector ...
247
+ #
248
+ # @example Increase elements to show
249
+ # diamonds.tdr(elements: 10)
250
+ #
251
+ # # =>
252
+ # RedAmber::DataFrame : 53940 x 11 Vectors
253
+ # Vectors : 8 numeric, 3 strings
254
+ # # key type level data_preview
255
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... ]
256
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, ... ]
257
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
258
+ # 3 :color string 7 ["E", "E", "E", "I", "J", "J", "I", "H", "E", "H", ... ]
259
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", "VVS2", "VVS1", "SI1", "VS2", "VS1", ... ]
260
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, ... ]
261
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, 57.0, 57.0, 55.0, 61.0, 61.0, ... ]
262
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, 336, 336, 337, 337, 338, ... ]
263
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, 3.94, 3.95, 4.07, 3.87, 4.0, ... ]
264
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, ... ]
265
+ # ... 1 more Vector ...
266
+ #
54
267
  def tdr(limit = 10, tally: 5, elements: 5)
55
268
  puts tdr_str(limit, tally: tally, elements: elements)
56
269
  end
270
+ alias_method :glimpse, :tdr
57
271
 
272
+ # Shortcut for `tdr(:all)``.
273
+ #
274
+ # @return (see #tdr)
275
+ #
276
+ def tdra
277
+ puts tdr_str(:all)
278
+ end
279
+
280
+ # rubocop:enable Layout/LineLength
281
+
282
+ # Returns some information about self in a transposed style by a string.
283
+ #
284
+ # @param (see #tdr)
285
+ # @option (see #tdr)
286
+ # @return [String] TDR style string.
287
+ #
58
288
  def tdr_str(limit = 10, tally: 5, elements: 5)
59
289
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
60
290
  end
61
291
 
292
+ # Returns html formatted text of self by IRuby::HTML.table.
293
+ #
294
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
295
+ # - If it is 'MINIMUM', returns shape by plain text.
296
+ # - If it is 'PLAIN', returns `#inspect` value by plain text.
297
+ # - If it is 'TDR', returns shape and transposed preview by plain text.
298
+ # - If it is 'TDRA', returns shape and transposed preview by plain text.
299
+ # - If it is 'TABLE' or otherwise, returns Table preview by html format.
300
+ # Default value of the ENV is 'TABLE'.
301
+ # @return [String]
302
+ # formatted string.
303
+ #
62
304
  def to_iruby
63
305
  require 'iruby'
64
306
  return ['text/plain', '(empty DataFrame)'] if empty?
@@ -71,19 +313,39 @@ module RedAmber
71
313
  ['text/plain', shape_str]
72
314
  when 'TDR'
73
315
  size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
316
+ when 'TDRA'
317
+ ['text/plain', tdr_str(:all)]
74
318
  else # 'TABLE'
75
319
  ['text/html', html_table]
76
320
  end
77
321
  end
78
322
 
79
- private # =====
80
-
323
+ # Return class and shape of self by a String.
324
+ #
325
+ # @param with_id [true, false]
326
+ # show id if true.
327
+ # @return [String]
328
+ # shape string.
329
+ # @example Default (without id)
330
+ # penguins.shape_str
331
+ #
332
+ # # =>
333
+ # "RedAmber::DataFrame : 344 x 8 Vectors"
334
+ #
335
+ # @example With id
336
+ # penguins.shape_str(with_id: true)
337
+ #
338
+ # # =>
339
+ # "RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000003980"
340
+ #
81
341
  def shape_str(with_id: false)
82
342
  shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
83
343
  id = with_id ? format(', 0x%016x', object_id) : ''
84
344
  "#{self.class} : #{shape_info}#{id}"
85
345
  end
86
346
 
347
+ private # =====
348
+
87
349
  def dataframe_info(limit, tally_level: 5, max_element: 5)
88
350
  return '' if empty?
89
351
 
@@ -95,7 +357,7 @@ module RedAmber
95
357
  quoted_keys = keys.map(&:inspect)
96
358
  headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
97
359
  data: 'data_preview' }
98
- header_format = make_header_format(levels, headers, quoted_keys)
360
+ header_format = make_header_format(levels, headers, quoted_keys, limit)
99
361
 
100
362
  sio = StringIO.new # output string buffer
101
363
  sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
@@ -125,9 +387,9 @@ module RedAmber
125
387
  sio.string
126
388
  end
127
389
 
128
- def make_header_format(levels, headers, quoted_keys)
390
+ def make_header_format(levels, headers, quoted_keys, limit)
129
391
  # find longest word to adjust width
130
- w_idx = n_keys.to_s.size
392
+ w_idx = ([n_keys, limit].min - 1).to_s.size
131
393
  w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
132
394
  w_type = [types.map(&:size).max, headers[:type].size].max
133
395
  w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
@@ -156,10 +418,17 @@ module RedAmber
156
418
  end
157
419
 
158
420
  def shorthand(vector, size, max_element)
159
- max = vector.temporal? ? 2 : max_element
160
- a = vector.to_a.take(max)
161
- a.map! { |e| e.nil? ? 'nil' : e.inspect }
162
- a << '... ' if size > max
421
+ a = vector.to_a.take(max_element)
422
+ a.map! do |e|
423
+ if e.nil?
424
+ 'nil'
425
+ elsif vector.temporal?
426
+ e.to_s.inspect
427
+ else
428
+ e.inspect
429
+ end
430
+ end
431
+ a << '... ' if size > max_element
163
432
  "[#{a.join(', ')}]"
164
433
  end
165
434
 
@@ -201,13 +470,13 @@ module RedAmber
201
470
  df = df.assign do
202
471
  vectors.each_with_object({}) do |v, assigner|
203
472
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
204
- .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
473
+ .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
205
474
  assigner[v.key] =
206
475
  original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
207
476
  end
208
477
  end
209
478
 
210
- width_list = df.vectors.map { |v| v.to_a.map(&:length).max }
479
+ width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
211
480
  total_length = width_list[-1] # reserved for last column
212
481
 
213
482
  formats = []
@@ -216,14 +485,13 @@ module RedAmber
216
485
  w = width_list[i]
217
486
  if total_length + w > width && i < df.n_keys - 1
218
487
  row_ellipsis = i
219
- formats << '%3s'
220
- formats << format_for_column(df.vectors[-1], original, width_list[-1])
488
+ formats << 3
489
+ formats << format_width(df.vectors[-1], original, width_list[-1])
221
490
  break
222
491
  end
223
- formats << format_for_column(v, original, w)
492
+ formats << format_width(v, original, w)
224
493
  total_length += w
225
494
  end
226
- format_str = formats.join(' ')
227
495
 
228
496
  str = StringIO.new
229
497
  if row_ellipsis
@@ -232,22 +500,31 @@ module RedAmber
232
500
  end
233
501
 
234
502
  df.to_a.each do |row|
235
- str.puts format(format_str, *row).rstrip
503
+ a =
504
+ row.zip(formats).map do |elem, format|
505
+ non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
+ if format.negative?
507
+ elem.ljust(-format + non_ascii_diff)
508
+ else
509
+ elem.rjust(format + non_ascii_diff)
510
+ end
511
+ end
512
+ str.puts a.join(' ').rstrip
236
513
  end
237
514
 
238
515
  str.string
239
516
  end
240
517
 
241
- def format_for_column(vector, original, width)
518
+ def format_width(vector, original, width)
242
519
  if vector.key != INDEX_KEY && !original[vector.key].numeric?
243
- "%-#{width}s"
520
+ -width
244
521
  else
245
- "%#{width}s"
522
+ width
246
523
  end
247
524
  end
248
525
 
249
526
  def html_table
250
- reduced = size > 8 ? self[0..4, -4..-1] : self
527
+ reduced = size > 10 ? self[0..5, -5..-1] : self
251
528
 
252
529
  converted = reduced.assign do
253
530
  vectors.select.with_object({}) do |vector, assigner|
@@ -267,12 +544,14 @@ module RedAmber
267
544
  format('%g', element)
268
545
  in Integer
269
546
  format('%d', element)
547
+ else
548
+ element
270
549
  end
271
550
  end
272
551
  end
273
552
  end
274
553
 
275
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
554
+ html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
276
555
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
277
556
  end
278
557
  end
@@ -1,38 +1,141 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-ins for the class DataFrame
5
5
  module DataFrameIndexable
6
- # Common method
7
- def map_indices(*indices)
8
- return self if indices.empty?
9
-
10
- indices = indices[0].data if indices[0].is_a?(Vector)
11
-
12
- new_dataframe_by(indices)
6
+ # Returns row index Vector.
7
+ #
8
+ # @overload indices
9
+ # return @indices as row indices (0...size).
10
+ #
11
+ # @return [Vector]
12
+ # a Vector of row indices.
13
+ # @example When `dataframe.size == 5`;
14
+ # dataframe.indices
15
+ #
16
+ # # =>
17
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000fb54>
18
+ # [0, 1, 2, 3, 4]
19
+ #
20
+ # @overload indices(start)
21
+ # return customized index Vector `(start..).take(size)`.
22
+ #
23
+ # @param start [#succ]
24
+ # element of start which have `#succ` method.
25
+ # @return [Vector]
26
+ # a Vector of row indices.
27
+ # @example When `dataframe.size == 5`;
28
+ # dataframe.indices(1)
29
+ #
30
+ # # =>
31
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000fba4>
32
+ # [1, 2, 3, 4, 5]
33
+ #
34
+ # dataframe.indices('a')
35
+ # # =>
36
+ # #<RedAmber::Vector(:string, size=5):0x000000000000fbb8>
37
+ # ["a", "b", "c", "d", "e"]
38
+ #
39
+ def indices(start = 0)
40
+ if start == 0 # rubocop:disable Style/NumericPredicate
41
+ @indices ||= Vector.new(0...size)
42
+ else
43
+ Vector.new((start..).take(size))
44
+ end
13
45
  end
46
+ alias_method :indexes, :indices
14
47
 
48
+ # Return sorted indexes of self by a Vector.
49
+ #
15
50
  # @param sort_keys [Arrow::SortKey]
16
51
  # :key, "key" or "+key" denotes ascending,
17
- # "-key" denotes descending order
18
- # @return [RedAmber::Vector] Sorted indices in Vector
52
+ # :"-key" or "-key" denotes descending order.
53
+ # @return [RedAmber::Vector]
54
+ # sorted indices in Vector.
55
+ # @example
56
+ # df
57
+ #
58
+ # # =>
59
+ # x y
60
+ # <uint8> <string>
61
+ # 0 3 B
62
+ # 1 5 A
63
+ # 2 1 B
64
+ # 3 4 A
65
+ # 4 2 C
66
+ #
67
+ # df.sort_indices('x')
68
+ #
69
+ # # =>
70
+ # #<RedAmber::Vector(:uint64, size=5):0x0000000000003854>
71
+ # [2, 4, 0, 3, 1]
72
+ #
19
73
  def sort_indices(*sort_keys)
20
74
  indices = @table.sort_indices(sort_keys.flatten)
21
75
  Vector.create(indices)
22
76
  end
23
77
 
24
- # @return [RedAmber::DataFrame] Sorted DataFrame
78
+ # Sort the contents of self.
79
+ #
80
+ # @param sort_keys [Arrow::SortKey]
81
+ # :key, "key" or "+key" denotes ascending,
82
+ # :"-key" or "-key" denotes descending order.
83
+ # @return [RedAmber::DataFrame]
84
+ # sorted DataFrame.
85
+ # @example Sort by a key
86
+ # df
87
+ #
88
+ # # =>
89
+ # x y
90
+ # <uint8> <string>
91
+ # 0 3 B
92
+ # 1 5 A
93
+ # 2 1 B
94
+ # 3 4 A
95
+ # 4 2 C
96
+ #
97
+ # df.sort('y')
98
+ #
99
+ # # =>
100
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000382c>
101
+ # x y
102
+ # <uint8> <string>
103
+ # 0 5 A
104
+ # 1 4 A
105
+ # 2 3 B
106
+ # 3 1 B
107
+ # 4 2 C
108
+ #
109
+ # @example Sort by two keys
110
+ # df.sort('y', 'x')
111
+ #
112
+ # # =>
113
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003890>
114
+ # x y
115
+ # <uint8> <string>
116
+ # 0 4 A
117
+ # 1 5 A
118
+ # 2 1 B
119
+ # 3 3 B
120
+ # 4 2 C
121
+ #
122
+ # @example Sort in descending order
123
+ # df.sort('-x')
124
+ #
125
+ # # =>
126
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003840>
127
+ # x y
128
+ # <uint8> <string>
129
+ # 0 5 A
130
+ # 1 4 A
131
+ # 2 3 B
132
+ # 3 2 C
133
+ # 4 1 B
134
+ #
25
135
  def sort(*sort_keys)
26
136
  indices = @table.sort_indices(sort_keys.flatten)
27
137
 
28
- new_dataframe_by(indices)
29
- end
30
-
31
- private
32
-
33
- def new_dataframe_by(index_array)
34
- t = Arrow::Function.find(:take).execute([@table, index_array]).value
35
- DataFrame.create(t)
138
+ take(indices)
36
139
  end
37
140
  end
38
141
  end