red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -3,22 +3,70 @@
3
3
  require 'stringio'
4
4
 
5
5
  module RedAmber
6
- # mix-ins for the class DataFrame
6
+ # Mix-in for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ # Refineme class String
9
+ using RefineString
10
+
11
+ # Used internally to display table.
8
12
  INDEX_KEY = :index_key_for_format_table
13
+ private_constant :INDEX_KEY
14
+
15
+ # rubocop:disable Layout/LineLength
9
16
 
10
- def to_s(width: 80)
17
+ # Show a preview of self as a string.
18
+ #
19
+ # @param width [Integer]
20
+ # maximum size of result.
21
+ # @param head [Integer]
22
+ # number of records to show from head.
23
+ # @param tail [Integer]
24
+ # number of records to show at tail.
25
+ # @return [String]
26
+ # string representation of self.
27
+ # @example Show penguins dataset
28
+ # puts penguins.to_s
29
+ #
30
+ # # =>
31
+ # species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
32
+ # <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
33
+ # 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
34
+ # 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
35
+ # 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
36
+ # 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
37
+ # 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
38
+ # : : : : : : : ... :
39
+ # 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
40
+ # 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
41
+ # 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
42
+ # 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
43
+ #
44
+ def to_s(width: 90, head: 5, tail: 4)
11
45
  return '' if empty?
12
46
 
13
- format_table(width: width)
47
+ format_table(width: width, head: head, tail: tail)
14
48
  end
15
49
 
16
- # Show statistical summary by a new DatFrame.
17
- # Make stats for numeric columns only.
18
- # NaNs are ignored.
19
- # Counts also show non-NaN counts.
50
+ # Show statistical summary by a new DataFrame.
51
+ #
52
+ # This method will make stats only for numeric columns.
53
+ # - NaNs are ignored.
54
+ # - `count` shows non-NaN counts.
55
+ #
56
+ # @return [DataFrame]
57
+ # a new dataframe.
58
+ # @example Statistical summary of penguins dataset
59
+ # puts penguins.summary.to_s
60
+ #
61
+ # # =>
62
+ # variables count mean std min 25% median 75% max
63
+ # <dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
64
+ # 0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
65
+ # 1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
66
+ # 2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
67
+ # 3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
68
+ # 4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
20
69
  #
21
- # @return [DataFrame] a new dataframe.
22
70
  def summary
23
71
  num_keys = keys.select { |key| self[key].numeric? }
24
72
 
@@ -36,29 +84,223 @@ module RedAmber
36
84
  end
37
85
  alias_method :describe, :summary
38
86
 
87
+ # Show information of self.
88
+ #
89
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
90
+ # - If it is 'TDR', returns class name, shape, object id
91
+ # and transposed preview for up to 10 variables.
92
+ # - If it is 'TDRA', returns class name, shape, object id
93
+ # and transposed preview for all variables.
94
+ # - If it is 'MINIMUM', returns class name and shape.
95
+ # - If it is 'PLAIN', returns class name, shape and Table preview
96
+ # for up to 512 columns and 128 columns.
97
+ # - If it is 'TABLE' or otherwise, returns class name, shape, object id
98
+ # and Table preview for up to 512 rows and 512 columns.
99
+ # Default value of the ENV is 'Table'.
100
+ # @return [String]
101
+ # information of self.
102
+ # @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
103
+ # puts df.inspect
104
+ #
105
+ # # =>
106
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
107
+ # x y
108
+ # <uint8> <string>
109
+ # 0 1 A
110
+ # 1 2 B
111
+ # 2 3 C
112
+ #
113
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'TDR'
114
+ # puts df.inspect
115
+ #
116
+ # # =>
117
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
118
+ # Vectors : 1 numeric, 1 string
119
+ # # key type level data_preview
120
+ # 0 :x uint8 3 [1, 2, 3]
121
+ # 1 :y string 3 ["A", "B", "C"]
122
+ #
123
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
124
+ # puts df.inspect
125
+ #
126
+ # # =>
127
+ # RedAmber::DataFrame : 3 x 2 Vectors
128
+ #
39
129
  def inspect
40
130
  mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
131
  case mode.upcase
42
132
  when 'TDR'
43
- "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
133
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
134
+ when 'TDRA'
135
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
44
136
  when 'MINIMUM'
45
137
  shape_str
138
+ when 'PLAIN'
139
+ "#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
46
140
  else
47
- "#<#{shape_str(with_id: true)}>\n#{self}"
141
+ "#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
48
142
  end
49
143
  end
50
144
 
51
- # - limit: max num of Vectors to show
52
- # - tally: max level to use tally mode
53
- # - elements: max element to show values in each vector
145
+ # Shows some information about self in a transposed style.
146
+ #
147
+ # @param limit [Integer, :all]
148
+ # maximum number of variables (columns) to show.
149
+ # Shows all valiables (columns) if it is `:all`.
150
+ # @param tally [Integer]
151
+ # maximum level to use tally mode.
152
+ # Tally mode counts the occurrences of each element and shows as a hash
153
+ # with the elements as keys and the corresponding counts as values.
154
+ # @param elements [Integer]
155
+ # maximum number of elements to show values
156
+ # in each column.
157
+ # @return [nil]
158
+ # @example Default
159
+ # diamonds = diamonds.assign_left(:index) { indices }
160
+ # diamonds
161
+ #
162
+ # # =>
163
+ # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
164
+ # index carat cut color clarity depth table price x y z
165
+ # <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
166
+ # 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
167
+ # 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
168
+ # 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
169
+ # 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
170
+ # 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
171
+ # 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
172
+ # 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
173
+ # 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
174
+ # 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
175
+ # 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
176
+ # 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
177
+ # 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
178
+ # 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
179
+ # 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
180
+ # 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
181
+ # 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
182
+ # 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
183
+ # 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
184
+ # 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
185
+ # 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
186
+ # : : : : : : : : : : : :
187
+ # 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
188
+ # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
189
+ # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
190
+ # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
191
+ #
192
+ # diamonds.tdr
193
+ #
194
+ # # =>
195
+ # RedAmber::DataFrame : 53940 x 11 Vectors
196
+ # Vectors : 8 numeric, 3 strings
197
+ # # key type level data_preview
198
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
199
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
200
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
201
+ # 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
202
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
203
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
204
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
205
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
206
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
207
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
208
+ # ... 1 more Vector ...
209
+ #
210
+ # @example Show all variables
211
+ # diamonds.tdr(:all)
212
+ #
213
+ # # =>
214
+ # RedAmber::DataFrame : 53940 x 11 Vectors
215
+ # Vectors : 8 numeric, 3 strings
216
+ # # key type level data_preview
217
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
218
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
219
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
220
+ # 3 :color string 7 ["E", "E", "E", "I", "J", ... ]
221
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
222
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
223
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
224
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
225
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
226
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
227
+ # 10 :z double 375 [2.43, 2.31, 2.31, 2.63, 2.75, ... ]
228
+ #
229
+ # @example Use tally mode up to 8 levels
230
+ # diamonds.tdr(tally: 8)
231
+ #
232
+ # # =>
233
+ # RedAmber::DataFrame : 53940 x 11 Vectors
234
+ # Vectors : 8 numeric, 3 strings
235
+ # # key type level data_preview
236
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, ... ]
237
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
238
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
239
+ # 3 :color string 7 {"E"=>9797, "I"=>5422, "J"=>2808, "H"=>8304, "F"=>9542, "G"=>11292, "D"=>6775}
240
+ # 4 :clarity string 8 {"SI2"=>9194, "SI1"=>13065, "VS1"=>8171, "VS2"=>12258, "VVS2"=>5066, "VVS1"=>3655, "I1"=>741, "IF"=>1790}
241
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
242
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
243
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, ... ]
244
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
245
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
246
+ # ... 1 more Vector ...
247
+ #
248
+ # @example Increase elements to show
249
+ # diamonds.tdr(elements: 10)
250
+ #
251
+ # # =>
252
+ # RedAmber::DataFrame : 53940 x 11 Vectors
253
+ # Vectors : 8 numeric, 3 strings
254
+ # # key type level data_preview
255
+ # 0 :index uint16 53940 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... ]
256
+ # 1 :carat double 273 [0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, ... ]
257
+ # 2 :cut string 5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
258
+ # 3 :color string 7 ["E", "E", "E", "I", "J", "J", "I", "H", "E", "H", ... ]
259
+ # 4 :clarity string 8 ["SI2", "SI1", "VS1", "VS2", "SI2", "VVS2", "VVS1", "SI1", "VS2", "VS1", ... ]
260
+ # 5 :depth double 184 [61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, ... ]
261
+ # 6 :table double 127 [55.0, 61.0, 65.0, 58.0, 58.0, 57.0, 57.0, 55.0, 61.0, 61.0, ... ]
262
+ # 7 :price uint16 11602 [326, 326, 327, 334, 335, 336, 336, 337, 337, 338, ... ]
263
+ # 8 :x double 554 [3.95, 3.89, 4.05, 4.2, 4.34, 3.94, 3.95, 4.07, 3.87, 4.0, ... ]
264
+ # 9 :y double 552 [3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, ... ]
265
+ # ... 1 more Vector ...
266
+ #
54
267
  def tdr(limit = 10, tally: 5, elements: 5)
55
268
  puts tdr_str(limit, tally: tally, elements: elements)
56
269
  end
270
+ alias_method :glimpse, :tdr
57
271
 
272
+ # Shortcut for `tdr(:all)``.
273
+ #
274
+ # @return (see #tdr)
275
+ #
276
+ def tdra
277
+ puts tdr_str(:all)
278
+ end
279
+
280
+ # rubocop:enable Layout/LineLength
281
+
282
+ # Returns some information about self in a transposed style by a string.
283
+ #
284
+ # @param (see #tdr)
285
+ # @option (see #tdr)
286
+ # @return [String] TDR style string.
287
+ #
58
288
  def tdr_str(limit = 10, tally: 5, elements: 5)
59
289
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
60
290
  end
61
291
 
292
+ # Returns html formatted text of self by IRuby::HTML.table.
293
+ #
294
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
295
+ # - If it is 'MINIMUM', returns shape by plain text.
296
+ # - If it is 'PLAIN', returns `#inspect` value by plain text.
297
+ # - If it is 'TDR', returns shape and transposed preview by plain text.
298
+ # - If it is 'TDRA', returns shape and transposed preview by plain text.
299
+ # - If it is 'TABLE' or otherwise, returns Table preview by html format.
300
+ # Default value of the ENV is 'TABLE'.
301
+ # @return [String]
302
+ # formatted string.
303
+ #
62
304
  def to_iruby
63
305
  require 'iruby'
64
306
  return ['text/plain', '(empty DataFrame)'] if empty?
@@ -71,19 +313,39 @@ module RedAmber
71
313
  ['text/plain', shape_str]
72
314
  when 'TDR'
73
315
  size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
316
+ when 'TDRA'
317
+ ['text/plain', tdr_str(:all)]
74
318
  else # 'TABLE'
75
319
  ['text/html', html_table]
76
320
  end
77
321
  end
78
322
 
79
- private # =====
80
-
323
+ # Return class and shape of self by a String.
324
+ #
325
+ # @param with_id [true, false]
326
+ # show id if true.
327
+ # @return [String]
328
+ # shape string.
329
+ # @example Default (without id)
330
+ # penguins.shape_str
331
+ #
332
+ # # =>
333
+ # "RedAmber::DataFrame : 344 x 8 Vectors"
334
+ #
335
+ # @example With id
336
+ # penguins.shape_str(with_id: true)
337
+ #
338
+ # # =>
339
+ # "RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000003980"
340
+ #
81
341
  def shape_str(with_id: false)
82
342
  shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
83
343
  id = with_id ? format(', 0x%016x', object_id) : ''
84
344
  "#{self.class} : #{shape_info}#{id}"
85
345
  end
86
346
 
347
+ private # =====
348
+
87
349
  def dataframe_info(limit, tally_level: 5, max_element: 5)
88
350
  return '' if empty?
89
351
 
@@ -95,7 +357,7 @@ module RedAmber
95
357
  quoted_keys = keys.map(&:inspect)
96
358
  headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
97
359
  data: 'data_preview' }
98
- header_format = make_header_format(levels, headers, quoted_keys)
360
+ header_format = make_header_format(levels, headers, quoted_keys, limit)
99
361
 
100
362
  sio = StringIO.new # output string buffer
101
363
  sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
@@ -125,9 +387,9 @@ module RedAmber
125
387
  sio.string
126
388
  end
127
389
 
128
- def make_header_format(levels, headers, quoted_keys)
390
+ def make_header_format(levels, headers, quoted_keys, limit)
129
391
  # find longest word to adjust width
130
- w_idx = n_keys.to_s.size
392
+ w_idx = ([n_keys, limit].min - 1).to_s.size
131
393
  w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
132
394
  w_type = [types.map(&:size).max, headers[:type].size].max
133
395
  w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
@@ -156,10 +418,17 @@ module RedAmber
156
418
  end
157
419
 
158
420
  def shorthand(vector, size, max_element)
159
- max = vector.temporal? ? 2 : max_element
160
- a = vector.to_a.take(max)
161
- a.map! { |e| e.nil? ? 'nil' : e.inspect }
162
- a << '... ' if size > max
421
+ a = vector.to_a.take(max_element)
422
+ a.map! do |e|
423
+ if e.nil?
424
+ 'nil'
425
+ elsif vector.temporal?
426
+ e.to_s.inspect
427
+ else
428
+ e.inspect
429
+ end
430
+ end
431
+ a << '... ' if size > max_element
163
432
  "[#{a.join(', ')}]"
164
433
  end
165
434
 
@@ -201,13 +470,13 @@ module RedAmber
201
470
  df = df.assign do
202
471
  vectors.each_with_object({}) do |v, assigner|
203
472
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
204
- .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
473
+ .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
205
474
  assigner[v.key] =
206
475
  original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
207
476
  end
208
477
  end
209
478
 
210
- width_list = df.vectors.map { |v| v.to_a.map(&:length).max }
479
+ width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
211
480
  total_length = width_list[-1] # reserved for last column
212
481
 
213
482
  formats = []
@@ -216,14 +485,13 @@ module RedAmber
216
485
  w = width_list[i]
217
486
  if total_length + w > width && i < df.n_keys - 1
218
487
  row_ellipsis = i
219
- formats << '%3s'
220
- formats << format_for_column(df.vectors[-1], original, width_list[-1])
488
+ formats << 3
489
+ formats << format_width(df.vectors[-1], original, width_list[-1])
221
490
  break
222
491
  end
223
- formats << format_for_column(v, original, w)
492
+ formats << format_width(v, original, w)
224
493
  total_length += w
225
494
  end
226
- format_str = formats.join(' ')
227
495
 
228
496
  str = StringIO.new
229
497
  if row_ellipsis
@@ -232,22 +500,31 @@ module RedAmber
232
500
  end
233
501
 
234
502
  df.to_a.each do |row|
235
- str.puts format(format_str, *row).rstrip
503
+ a =
504
+ row.zip(formats).map do |elem, format|
505
+ non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
+ if format.negative?
507
+ elem.ljust(-format + non_ascii_diff)
508
+ else
509
+ elem.rjust(format + non_ascii_diff)
510
+ end
511
+ end
512
+ str.puts a.join(' ').rstrip
236
513
  end
237
514
 
238
515
  str.string
239
516
  end
240
517
 
241
- def format_for_column(vector, original, width)
518
+ def format_width(vector, original, width)
242
519
  if vector.key != INDEX_KEY && !original[vector.key].numeric?
243
- "%-#{width}s"
520
+ -width
244
521
  else
245
- "%#{width}s"
522
+ width
246
523
  end
247
524
  end
248
525
 
249
526
  def html_table
250
- reduced = size > 8 ? self[0..4, -4..-1] : self
527
+ reduced = size > 10 ? self[0..5, -5..-1] : self
251
528
 
252
529
  converted = reduced.assign do
253
530
  vectors.select.with_object({}) do |vector, assigner|
@@ -267,12 +544,14 @@ module RedAmber
267
544
  format('%g', element)
268
545
  in Integer
269
546
  format('%d', element)
547
+ else
548
+ element
270
549
  end
271
550
  end
272
551
  end
273
552
  end
274
553
 
275
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
554
+ html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
276
555
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
277
556
  end
278
557
  end
@@ -1,38 +1,141 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-ins for the class DataFrame
5
5
  module DataFrameIndexable
6
- # Common method
7
- def map_indices(*indices)
8
- return self if indices.empty?
9
-
10
- indices = indices[0].data if indices[0].is_a?(Vector)
11
-
12
- new_dataframe_by(indices)
6
+ # Returns row index Vector.
7
+ #
8
+ # @overload indices
9
+ # return @indices as row indices (0...size).
10
+ #
11
+ # @return [Vector]
12
+ # a Vector of row indices.
13
+ # @example When `dataframe.size == 5`;
14
+ # dataframe.indices
15
+ #
16
+ # # =>
17
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000fb54>
18
+ # [0, 1, 2, 3, 4]
19
+ #
20
+ # @overload indices(start)
21
+ # return customized index Vector `(start..).take(size)`.
22
+ #
23
+ # @param start [#succ]
24
+ # element of start which have `#succ` method.
25
+ # @return [Vector]
26
+ # a Vector of row indices.
27
+ # @example When `dataframe.size == 5`;
28
+ # dataframe.indices(1)
29
+ #
30
+ # # =>
31
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000fba4>
32
+ # [1, 2, 3, 4, 5]
33
+ #
34
+ # dataframe.indices('a')
35
+ # # =>
36
+ # #<RedAmber::Vector(:string, size=5):0x000000000000fbb8>
37
+ # ["a", "b", "c", "d", "e"]
38
+ #
39
+ def indices(start = 0)
40
+ if start == 0 # rubocop:disable Style/NumericPredicate
41
+ @indices ||= Vector.new(0...size)
42
+ else
43
+ Vector.new((start..).take(size))
44
+ end
13
45
  end
46
+ alias_method :indexes, :indices
14
47
 
48
+ # Return sorted indexes of self by a Vector.
49
+ #
15
50
  # @param sort_keys [Arrow::SortKey]
16
51
  # :key, "key" or "+key" denotes ascending,
17
- # "-key" denotes descending order
18
- # @return [RedAmber::Vector] Sorted indices in Vector
52
+ # :"-key" or "-key" denotes descending order.
53
+ # @return [RedAmber::Vector]
54
+ # sorted indices in Vector.
55
+ # @example
56
+ # df
57
+ #
58
+ # # =>
59
+ # x y
60
+ # <uint8> <string>
61
+ # 0 3 B
62
+ # 1 5 A
63
+ # 2 1 B
64
+ # 3 4 A
65
+ # 4 2 C
66
+ #
67
+ # df.sort_indices('x')
68
+ #
69
+ # # =>
70
+ # #<RedAmber::Vector(:uint64, size=5):0x0000000000003854>
71
+ # [2, 4, 0, 3, 1]
72
+ #
19
73
  def sort_indices(*sort_keys)
20
74
  indices = @table.sort_indices(sort_keys.flatten)
21
75
  Vector.create(indices)
22
76
  end
23
77
 
24
- # @return [RedAmber::DataFrame] Sorted DataFrame
78
+ # Sort the contents of self.
79
+ #
80
+ # @param sort_keys [Arrow::SortKey]
81
+ # :key, "key" or "+key" denotes ascending,
82
+ # :"-key" or "-key" denotes descending order.
83
+ # @return [RedAmber::DataFrame]
84
+ # sorted DataFrame.
85
+ # @example Sort by a key
86
+ # df
87
+ #
88
+ # # =>
89
+ # x y
90
+ # <uint8> <string>
91
+ # 0 3 B
92
+ # 1 5 A
93
+ # 2 1 B
94
+ # 3 4 A
95
+ # 4 2 C
96
+ #
97
+ # df.sort('y')
98
+ #
99
+ # # =>
100
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000382c>
101
+ # x y
102
+ # <uint8> <string>
103
+ # 0 5 A
104
+ # 1 4 A
105
+ # 2 3 B
106
+ # 3 1 B
107
+ # 4 2 C
108
+ #
109
+ # @example Sort by two keys
110
+ # df.sort('y', 'x')
111
+ #
112
+ # # =>
113
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003890>
114
+ # x y
115
+ # <uint8> <string>
116
+ # 0 4 A
117
+ # 1 5 A
118
+ # 2 1 B
119
+ # 3 3 B
120
+ # 4 2 C
121
+ #
122
+ # @example Sort in descending order
123
+ # df.sort('-x')
124
+ #
125
+ # # =>
126
+ # #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003840>
127
+ # x y
128
+ # <uint8> <string>
129
+ # 0 5 A
130
+ # 1 4 A
131
+ # 2 3 B
132
+ # 3 2 C
133
+ # 4 1 B
134
+ #
25
135
  def sort(*sort_keys)
26
136
  indices = @table.sort_indices(sort_keys.flatten)
27
137
 
28
- new_dataframe_by(indices)
29
- end
30
-
31
- private
32
-
33
- def new_dataframe_by(index_array)
34
- t = Arrow::Function.find(:take).execute([@table, index_array]).value
35
- DataFrame.create(t)
138
+ take(indices)
36
139
  end
37
140
  end
38
141
  end