red_amber 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -0
  3. data/CHANGELOG.md +125 -0
  4. data/README.md +86 -269
  5. data/doc/DataFrame.md +427 -281
  6. data/doc/Vector.md +35 -54
  7. data/doc/image/basic_verbs.png +0 -0
  8. data/doc/image/dataframe/assign.png +0 -0
  9. data/doc/image/dataframe/assign_operation.png +0 -0
  10. data/doc/image/dataframe/drop.png +0 -0
  11. data/doc/image/dataframe/pick.png +0 -0
  12. data/doc/image/dataframe/pick_operation.png +0 -0
  13. data/doc/image/dataframe/remove.png +0 -0
  14. data/doc/image/dataframe/rename.png +0 -0
  15. data/doc/image/dataframe/rename_operation.png +0 -0
  16. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  17. data/doc/image/dataframe/slice.png +0 -0
  18. data/doc/image/dataframe/slice_operation.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/group_operation.png +0 -0
  21. data/doc/image/replace-if_then.png +0 -0
  22. data/doc/image/reshaping_dataframe.png +0 -0
  23. data/doc/image/screenshot.png +0 -0
  24. data/doc/image/vector/binary_element_wise.png +0 -0
  25. data/doc/image/vector/unary_aggregation.png +0 -0
  26. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  27. data/doc/image/vector/unary_element_wise.png +0 -0
  28. data/lib/red_amber/data_frame.rb +33 -41
  29. data/lib/red_amber/data_frame_displayable.rb +59 -6
  30. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  31. data/lib/red_amber/data_frame_reshaping.rb +12 -10
  32. data/lib/red_amber/data_frame_selectable.rb +53 -9
  33. data/lib/red_amber/data_frame_variable_operation.rb +57 -20
  34. data/lib/red_amber/group.rb +5 -3
  35. data/lib/red_amber/helper.rb +20 -18
  36. data/lib/red_amber/vector.rb +50 -31
  37. data/lib/red_amber/vector_functions.rb +21 -24
  38. data/lib/red_amber/vector_selectable.rb +18 -9
  39. data/lib/red_amber/vector_updatable.rb +6 -3
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -0
  42. metadata +13 -3
  43. data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/Vector.md CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
7
7
  ### Create from a column in a DataFrame
8
8
 
9
9
  ```ruby
10
- df = RedAmber::DataFrame.new(x: [1, 2, 3])
10
+ df = DataFrame.new(x: [1, 2, 3])
11
11
  df[:x]
12
12
  # =>
13
13
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
17
17
  ### New from an Array
18
18
 
19
19
  ```ruby
20
- vector = RedAmber::Vector.new([1, 2, 3])
20
+ vector = Vector.new([1, 2, 3])
21
21
  # or
22
- vector = RedAmber::Vector.new(1, 2, 3)
22
+ vector = Vector.new(1, 2, 3)
23
23
  # or
24
- vector = RedAmber::Vector.new(1..3)
24
+ vector = Vector.new(1..3)
25
25
  # or
26
- vector = RedAmber::Vector.new(Arrow::Array([1, 2, 3])
26
+ vector = Vector.new(Arrow::Array.new([1, 2, 3])
27
27
 
28
28
  # =>
29
29
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
61
61
 
62
62
  ### `type_class`
63
63
 
64
- ### `each`
64
+ ### `each`, `map`, `collect`
65
65
 
66
66
  If block is not given, returns Enumerator.
67
67
 
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
78
78
  - `limit` sets size limit to display a long array.
79
79
 
80
80
  ```ruby
81
- vector = RedAmber::Vector.new((1..50).to_a)
81
+ vector = Vector.new((1..50).to_a)
82
82
  # =>
83
83
  #<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
84
84
  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
95
95
  - Negative index is also OK like the Ruby's primitive Array.
96
96
 
97
97
  ```ruby
98
- array = RedAmber::Vector.new(%w[A B C D E])
99
- indices = RedAmber::Vector.new([0.1, -0.5, -5.1])
98
+ array = Vector.new(%w[A B C D E])
99
+ indices = Vector.new([0.1, -0.5, -5.1])
100
100
  array.take(indices)
101
101
  # or
102
102
  array[indices]
@@ -106,7 +106,7 @@ array[indices]
106
106
  ["A", "E", "A"]
107
107
  ```
108
108
 
109
- ### `filter(booleans)`, `[](booleans)`
109
+ ### `filter(booleans)`, `select(booleans)`, `[](booleans)`
110
110
 
111
111
  - Acceptable class for booleans:
112
112
  - An array of true, false, or nil
@@ -114,7 +114,7 @@ array[indices]
114
114
  - Arrow::BooleanArray
115
115
 
116
116
  ```ruby
117
- array = RedAmber::Vector.new(%w[A B C D E])
117
+ array = Vector.new(%w[A B C D E])
118
118
  booleans = [true, false, nil, false, true]
119
119
  array.filter(booleans)
120
120
  # or
@@ -124,6 +124,7 @@ array[booleans]
124
124
  #<RedAmber::Vector(:string, size=2):0x000000000000f21c>
125
125
  ["A", "E"]
126
126
  ```
127
+ `filter` and `select` also accepts a block.
127
128
 
128
129
  ## Functions
129
130
 
@@ -158,7 +159,7 @@ Options can be used as follows.
158
159
  See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
159
160
 
160
161
  ```ruby
161
- double = RedAmber::Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
+ double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
163
  #=>
163
164
  #<RedAmber::Vector(:double, size=6):0x000000000000f910>
164
165
  [1.0, NaN, -Infinity, Infinity, nil, 0.0]
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
168
169
  double.count(mode: :only_null) #=> 1
169
170
  double.count(mode: :all) #=> 6
170
171
 
171
- boolean = RedAmber::Vector.new([true, true, nil])
172
+ boolean = Vector.new([true, true, nil])
172
173
  #=>
173
174
  #<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
174
175
  [true, true, nil]
@@ -187,8 +188,8 @@ boolean.all(skip_nulls: false) #=> false
187
188
  | ✓ `-@` | | ✓ | | |as `-vector`|
188
189
  | ✓ `negate` | | ✓ | | |`-@` |
189
190
  | ✓ `abs` | | ✓ | | | |
190
- |[ ]`acos` | | [ ] | | | |
191
- |[ ]`asin` | | [ ] | | | |
191
+ | `acos` | || | | |
192
+ | `asin` | || | | |
192
193
  | ✓ `atan` | | ✓ | | | |
193
194
  | ✓ `bit_wise_not`| | (✓) | | |integer only|
194
195
  | ✓ `ceil` | | ✓ | | | |
@@ -197,10 +198,10 @@ boolean.all(skip_nulls: false) #=> false
197
198
  | ✓`fill_nil_forward` | ✓ | ✓ | ✓ | | |
198
199
  | ✓ `floor` | | ✓ | | | |
199
200
  | ✓ `invert` | ✓ | | | |`!`, alias `not`|
200
- |[ ]`ln` | | [ ] | | | |
201
- |[ ]`log10` | | [ ] | | | |
202
- |[ ]`log1p` | | [ ] | | | |
203
- |[ ]`log2` | | [ ] | | | |
201
+ | `ln` | || | | |
202
+ | `log10` | || | | |
203
+ | `log1p` | || | |Compute natural log of (1+x)|
204
+ | `log2` | || | | |
204
205
  | ✓ `round` | | ✓ | | ✓ Round (:mode, :n_digits)| |
205
206
  | ✓ `round_to_multiple`| | ✓ | | ✓ RoundToMultiple :mode, :multiple| multiple must be an Arrow::Scalar|
206
207
  | ✓ `sign` | | ✓ | | | |
@@ -215,7 +216,7 @@ Examples of options for `#round`;
215
216
  - `round_mode` Specify rounding mode.
216
217
 
217
218
  ```ruby
218
- double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
+ double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
220
  # => [15.15, 2.5, 3.5, -4.5, -5.5]
220
221
  double.round
221
222
  # => [15.0, 2.0, 4.0, -4.0, -6.0]
@@ -267,7 +268,7 @@ double.round(n_digits: -1)
267
268
  | ✓ `is_valid` | ✓ | ✓ | ✓ | | |
268
269
  | ✓ `less` | ✓ | ✓ | ✓ | |`<`, alias `lt`|
269
270
  | ✓ `less_equal` | ✓ | ✓ | ✓ | |`<=`, alias `le`|
270
- |[ ]`logb` | | [ ] | | | |
271
+ | `logb` | || | |logb(b) Compute base `b` logarithm|
271
272
  |[ ]`mod` | | [ ] | | | `%` |
272
273
  | ✓ `multiply` | | ✓ | | | `*` |
273
274
  | ✓ `not_equal` | ✓ | ✓ | ✓ | |`!=`, alias `ne`|
@@ -283,8 +284,6 @@ double.round(n_digits: -1)
283
284
 
284
285
  Returns a new array with distinct elements.
285
286
 
286
- (Not impremented functions)
287
-
288
287
  ### `tally` and `value_counts`
289
288
 
290
289
  Compute counts of unique elements and return a Hash.
@@ -295,7 +294,7 @@ double.round(n_digits: -1)
295
294
  array = [0.0/0, Float::NAN]
296
295
  array.tally #=> {NaN=>1, NaN=>1}
297
296
 
298
- vector = RedAmber::Vector.new(array)
297
+ vector = Vector.new(array)
299
298
  vector.tally #=> {NaN=>2}
300
299
  vector.value_counts #=> {NaN=>2}
301
300
  ```
@@ -309,19 +308,10 @@ double.round(n_digits: -1)
309
308
 
310
309
  ### `sort_indexes`, `sort_indices`, `array_sort_indices`
311
310
 
312
- ### [ ] `sort`, `sort_by`
313
- ### [ ] argmin, argmax
314
- ### [ ] (array functions)
315
- ### [ ] (strings functions)
316
- ### [ ] (temporal functions)
317
- ### [ ] (conditional functions)
318
- ### [ ] (index functions)
319
- ### [ ] (other functions)
320
-
321
311
  ## Coerce
322
312
 
323
313
  ```ruby
324
- vector = RedAmber::Vector.new(1,2,3)
314
+ vector = Vector.new(1,2,3)
325
315
  # =>
326
316
  #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
327
317
  [1, 2, 3]
@@ -351,12 +341,13 @@ vector * -1
351
341
  - Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
352
342
  - Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
353
343
  - Boolean specifiers specify the position of replacer in true.
344
+ - If booleans.any is false, no replacement happen and return self.
354
345
  - Index specifiers specify the position of replacer in indices.
355
346
  - replacer specifies the values to be replaced.
356
347
  - The number of true in booleans must be equal to the length of replacer
357
348
 
358
349
  ```ruby
359
- vector = RedAmber::Vector.new([1, 2, 3])
350
+ vector = Vector.new([1, 2, 3])
360
351
  booleans = [true, false, true]
361
352
  replacer = [4, 5]
362
353
  vector.replace(booleans, replacer)
@@ -390,7 +381,7 @@ vector.replace(booleans, replacer)
390
381
  ```ruby
391
382
  booleans = [true, false, nil]
392
383
  replacer = -1
393
- vec.replace(booleans, replacer)
384
+ vector.replace(booleans, replacer)
394
385
  =>
395
386
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
396
387
  [-1, 2, nil]
@@ -401,17 +392,7 @@ vec.replace(booleans, replacer)
401
392
  ```ruby
402
393
  booleans = [true, false, true]
403
394
  replacer = [nil]
404
- vec.replace(booleans, replacer)
405
- =>
406
- #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
407
- [nil, 2, nil]
408
- ```
409
-
410
- - If no replacer specified, it is same as to specify nil.
411
-
412
- ```ruby
413
- booleans = [true, false, true]
414
- vec.replace(booleans)
395
+ vector.replace(booleans, replacer)
415
396
  =>
416
397
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
417
398
  [nil, 2, nil]
@@ -420,7 +401,7 @@ vec.replace(booleans)
420
401
  - An example to replace 'NA' to nil.
421
402
 
422
403
  ```ruby
423
- vector = RedAmber::Vector.new(['A', 'B', 'NA'])
404
+ vector = Vector.new(['A', 'B', 'NA'])
424
405
  vector.replace(vector == 'NA', nil)
425
406
  # =>
426
407
  #<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
@@ -432,7 +413,7 @@ vector.replace(vector == 'NA', nil)
432
413
  Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
433
414
 
434
415
  ```ruby
435
- vector = RedAmber::Vector.new([1, 2, 3])
416
+ vector = Vector.new([1, 2, 3])
436
417
  indices = [2, 1]
437
418
  replacer = [4, 5]
438
419
  vector.replace(indices, replacer)
@@ -448,7 +429,7 @@ Propagate the last valid observation forward (or backward).
448
429
  Or preserve nil if all previous values are nil or at the end.
449
430
 
450
431
  ```ruby
451
- integer = RedAmber::Vector.new([0, 1, nil, 3, nil])
432
+ integer = Vector.new([0, 1, nil, 3, nil])
452
433
  integer.fill_nil_forward
453
434
  # =>
454
435
  #<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
@@ -470,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
470
451
  This example will normalize negative indices to positive ones.
471
452
 
472
453
  ```ruby
473
- indices = RedAmber::Vector.new([1, -1, 3, -4])
454
+ indices = Vector.new([1, -1, 3, -4])
474
455
  array_size = 10
475
456
  normalized_indices = (indices < 0).if_else(indices + array_size, indices)
476
457
 
@@ -485,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
485
466
  By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
486
467
 
487
468
  ```ruby
488
- vector = RedAmber::Vector.new %W[A B C D]
469
+ vector = Vector.new %W[A B C D]
489
470
  values = ['A', 'C', 'X']
490
471
  vector.is_in(values)
491
472
 
@@ -497,7 +478,7 @@ vector.is_in(values)
497
478
  `values` are casted to the same Class of Vector.
498
479
 
499
480
  ```ruby
500
- vector = RedAmber::Vector.new([1, 2, 255])
481
+ vector = Vector.new([1, 2, 255])
501
482
  vector.is_in(1, -1)
502
483
 
503
484
  # =>
@@ -510,7 +491,7 @@ vector.is_in(1, -1)
510
491
  Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
511
492
 
512
493
  ```ruby
513
- vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
494
+ vector = Vector.new([1, 2, 3, 4, 5])
514
495
  vector.shift
515
496
 
516
497
  # =>
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -7,6 +7,7 @@ module RedAmber
7
7
  # mix-in
8
8
  include DataFrameDisplayable
9
9
  include DataFrameIndexable
10
+ include DataFrameLoadSave
10
11
  include DataFrameReshaping
11
12
  include DataFrameSelectable
12
13
  include DataFrameVariableOperation
@@ -37,6 +38,13 @@ module RedAmber
37
38
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
38
39
  # returns empty DataFrame
39
40
  @table = Arrow::Table.new({}, [])
41
+ in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
42
+ table = arrowable.to_arrow
43
+ unless table.is_a?(Arrow::Table)
44
+ raise DataFrameTypeError,
45
+ "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
46
+ end
47
+ @table = table
40
48
  in [Arrow::Table => table]
41
49
  @table = table
42
50
  in [DataFrame => dataframe]
@@ -52,10 +60,9 @@ module RedAmber
52
60
  @table = Arrow::Table.new(*args)
53
61
  end
54
62
  name_unnamed_keys
55
- end
56
63
 
57
- def self.load(path, options = {})
58
- DataFrame.new(Arrow::Table.load(path, options))
64
+ duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
65
+ raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
59
66
  end
60
67
 
61
68
  attr_reader :table
@@ -64,10 +71,6 @@ module RedAmber
64
71
  @table
65
72
  end
66
73
 
67
- def save(output, options = {})
68
- @table.save(output, options)
69
- end
70
-
71
74
  # Returns the number of rows.
72
75
  #
73
76
  # @return [Integer] Number of rows.
@@ -159,12 +162,19 @@ module RedAmber
159
162
  @vectors || @vectors = init_instance_vars(:vectors)
160
163
  end
161
164
 
162
- # Returns row indices (0...size) in an Array.
165
+ # Returns row indices (start...(size+start)) in an Array.
163
166
  #
167
+ # @param start [Object]
168
+ # Object which have #succ method.
164
169
  # @return [Array]
165
- # An Array of all indices of rows.
166
- def indices
167
- (0...size).to_a
170
+ # An Array of indices of the row.
171
+ # @example
172
+ # (when self.size == 5)
173
+ # - indices #=> [0, 1, 2, 3, 4]
174
+ # - indices(1) #=> [1, 2, 3, 4, 5]
175
+ # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
176
+ def indices(start = 0)
177
+ (start..).take(size)
168
178
  end
169
179
  alias_method :indexes, :indices
170
180
 
@@ -208,23 +218,24 @@ module RedAmber
208
218
  Rover::DataFrame.new(to_h)
209
219
  end
210
220
 
211
- def to_iruby
212
- require 'iruby'
213
- return ['text/plain', '(empty DataFrame)'] if empty?
214
-
215
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
216
- size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
217
- else
218
- ['text/html', html_table]
219
- end
220
- end
221
-
222
221
  def group(*group_keys, &block)
223
222
  g = Group.new(self, group_keys)
224
223
  g = g.summarize(&block) if block
225
224
  g
226
225
  end
227
226
 
227
+ def method_missing(name, *args, &block)
228
+ return v(name) if args.empty?
229
+
230
+ super
231
+ end
232
+
233
+ def respond_to_missing?(name, include_private)
234
+ return true if key?(name)
235
+
236
+ super
237
+ end
238
+
228
239
  private
229
240
 
230
241
  # initialize @variable, @keys, @vectors and return one of them
@@ -241,25 +252,6 @@ module RedAmber
241
252
  ary[%i[variables keys vectors].index(var)]
242
253
  end
243
254
 
244
- def html_table
245
- reduced = size > 8 ? self[0..4, -4..-1] : self
246
-
247
- converted = reduced.assign do
248
- vectors.select.with_object({}) do |vector, assigner|
249
- if vector.has_nil?
250
- assigner[vector.key] = vector.to_a.map do |e|
251
- e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
252
- e = '""' if e.empty? # empty string
253
- e.sub(/(\s+)/, '"\1"') # blank spaces
254
- end
255
- end
256
- end
257
- end
258
-
259
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
260
- "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
261
- end
262
-
263
255
  def name_unnamed_keys
264
256
  return unless @table[:'']
265
257
 
@@ -37,8 +37,12 @@ module RedAmber
37
37
  alias_method :describe, :summary
38
38
 
39
39
  def inspect
40
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
40
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
+ case mode.upcase
42
+ when 'TDR'
41
43
  "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
44
+ when 'MINIMUM'
45
+ shape_str
42
46
  else
43
47
  "#<#{shape_str(with_id: true)}>\n#{self}"
44
48
  end
@@ -55,6 +59,23 @@ module RedAmber
55
59
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
56
60
  end
57
61
 
62
+ def to_iruby
63
+ require 'iruby'
64
+ return ['text/plain', '(empty DataFrame)'] if empty?
65
+
66
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
67
+ case mode.upcase
68
+ when 'PLAIN'
69
+ ['text/plain', inspect]
70
+ when 'MINIMUM'
71
+ ['text/plain', shape_str]
72
+ when 'TDR'
73
+ size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
74
+ else # 'TABLE'
75
+ ['text/html', html_table]
76
+ end
77
+ end
78
+
58
79
  private # =====
59
80
 
60
81
  def shape_str(with_id: false)
@@ -98,7 +119,7 @@ module RedAmber
98
119
  else
99
120
  [shorthand(vector, size, max_element)]
100
121
  end
101
- sio.printf header_format, i + 1, key, type, data_tally.size, a.join(', ')
122
+ sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
102
123
  end
103
124
  sio.string
104
125
  end
@@ -154,9 +175,9 @@ module RedAmber
154
175
 
155
176
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
156
177
  original = self
157
- indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
178
+ indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
179
  df = slice(indices).assign do
159
- assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
180
+ assigner = { INDEX_KEY => indices.map(&:to_s) }
160
181
  vectors.each_with_object(assigner) do |v, a|
161
182
  a[v.key] = v.to_a.map do |e|
162
183
  if e.nil?
@@ -173,12 +194,12 @@ module RedAmber
173
194
  end
174
195
 
175
196
  df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
176
- df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
197
+ df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
177
198
  df = df.assign do
178
199
  vectors.each_with_object({}) do |v, assigner|
179
200
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
180
201
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
181
- assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
202
+ assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
182
203
  end
183
204
  end
184
205
 
@@ -220,5 +241,37 @@ module RedAmber
220
241
  "%#{width}s"
221
242
  end
222
243
  end
244
+
245
+ def html_table
246
+ reduced = size > 8 ? self[0..4, -4..-1] : self
247
+
248
+ converted = reduced.assign do
249
+ vectors.select.with_object({}) do |vector, assigner|
250
+ assigner[vector.key] = vector.map do |element|
251
+ case element
252
+ in TrueClass
253
+ '<i>(true)</i>'
254
+ in FalseClass
255
+ '<i>(false)</i>'
256
+ in NilClass
257
+ '<i>(nil)</i>'
258
+ in ''
259
+ '""'
260
+ in String
261
+ element.sub(/^(\s+)$/, '"\1"') # blank spaces
262
+ in Float
263
+ format('%g', element)
264
+ in Integer
265
+ format('%d', element)
266
+ else
267
+ element
268
+ end
269
+ end
270
+ end
271
+ end
272
+
273
+ html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
274
+ "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
275
+ end
223
276
  end
224
277
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-ins for the class DataFrame
5
+ module DataFrameLoadSave
6
+ # Enable `self.load` as class method of DataFrame
7
+ def self.included(klass)
8
+ klass.extend ClassMethods
9
+ end
10
+
11
+ # Enable `self.load` as class method of DataFrame
12
+ module ClassMethods
13
+ # Load DataFrame via Arrow::Table.load
14
+ def load(path, options = {})
15
+ DataFrame.new(Arrow::Table.load(path, options))
16
+ end
17
+ end
18
+
19
+ # Save DataFrame
20
+ def save(output, options = {})
21
+ @table.save(output, options)
22
+ end
23
+
24
+ # Save and reload to cast automatically
25
+ # Via tsv format file temporally as default
26
+ #
27
+ # experimental feature
28
+ def auto_cast(format: :tsv)
29
+ return self if empty?
30
+
31
+ tempfile = Arrow::ResizableBuffer.new(1024)
32
+ save(tempfile, format: format)
33
+ DataFrame.load(tempfile, format: format)
34
+ end
35
+ end
36
+ end
@@ -5,20 +5,21 @@ module RedAmber
5
5
  module DataFrameReshaping
6
6
  # Transpose a wide DataFrame.
7
7
  #
8
- # @param key [Symbol, FalseClass] key of the index column
8
+ # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
- # If it is false, keys[0] is used.
11
- # @param new_key [Symbol, FalseClass] key name of transposed index column.
12
- # If it is false, :name is used. If it already exists, :name1.succ is used.
10
+ # If it is not specified, keys[0] is used.
11
+ # @param new_key [Symbol] key name of transposed index column.
12
+ # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, new_key: :name)
15
- raise DataFrameArgumentError, "Not include: #{key}" unless keys.include?(key)
14
+ def transpose(key: keys.first, name: :NAME)
15
+ raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- new_key = (:name1..).find { |k| !new_keys.include?(k) } if new_keys.include?(new_key)
19
+ name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { new_key => (keys - [key]) }
21
+ names = (keys - [key]).map { |x| x&.to_s }
22
+ hash = { name => names }
22
23
  i = keys.index(key)
23
24
  each_row do |h|
24
25
  k = h.values[i]
@@ -33,7 +34,7 @@ module RedAmber
33
34
  # @param name [Symbol, String] key of the column which is come **from values**.
34
35
  # @param value [Symbol, String] key of the column which is come **from values**.
35
36
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :name, value: :value)
37
+ def to_long(*keep_keys, name: :NAME, value: :VALUE)
37
38
  not_included = keep_keys - keys
38
39
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
40
 
@@ -55,6 +56,7 @@ module RedAmber
55
56
  end
56
57
  end
57
58
  end
59
+ hash[name] = hash[name].map { |x| x&.to_s }
58
60
  DataFrame.new(hash)
59
61
  end
60
62
 
@@ -63,7 +65,7 @@ module RedAmber
63
65
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
66
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
67
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :name, value: :value)
68
+ def to_wide(name: :NAME, value: :VALUE)
67
69
  name = name.to_sym
68
70
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
71