red_amber 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -0
  3. data/CHANGELOG.md +125 -0
  4. data/README.md +86 -269
  5. data/doc/DataFrame.md +427 -281
  6. data/doc/Vector.md +35 -54
  7. data/doc/image/basic_verbs.png +0 -0
  8. data/doc/image/dataframe/assign.png +0 -0
  9. data/doc/image/dataframe/assign_operation.png +0 -0
  10. data/doc/image/dataframe/drop.png +0 -0
  11. data/doc/image/dataframe/pick.png +0 -0
  12. data/doc/image/dataframe/pick_operation.png +0 -0
  13. data/doc/image/dataframe/remove.png +0 -0
  14. data/doc/image/dataframe/rename.png +0 -0
  15. data/doc/image/dataframe/rename_operation.png +0 -0
  16. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  17. data/doc/image/dataframe/slice.png +0 -0
  18. data/doc/image/dataframe/slice_operation.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/group_operation.png +0 -0
  21. data/doc/image/replace-if_then.png +0 -0
  22. data/doc/image/reshaping_dataframe.png +0 -0
  23. data/doc/image/screenshot.png +0 -0
  24. data/doc/image/vector/binary_element_wise.png +0 -0
  25. data/doc/image/vector/unary_aggregation.png +0 -0
  26. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  27. data/doc/image/vector/unary_element_wise.png +0 -0
  28. data/lib/red_amber/data_frame.rb +33 -41
  29. data/lib/red_amber/data_frame_displayable.rb +59 -6
  30. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  31. data/lib/red_amber/data_frame_reshaping.rb +12 -10
  32. data/lib/red_amber/data_frame_selectable.rb +53 -9
  33. data/lib/red_amber/data_frame_variable_operation.rb +57 -20
  34. data/lib/red_amber/group.rb +5 -3
  35. data/lib/red_amber/helper.rb +20 -18
  36. data/lib/red_amber/vector.rb +50 -31
  37. data/lib/red_amber/vector_functions.rb +21 -24
  38. data/lib/red_amber/vector_selectable.rb +18 -9
  39. data/lib/red_amber/vector_updatable.rb +6 -3
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -0
  42. metadata +13 -3
  43. data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/Vector.md CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
7
7
  ### Create from a column in a DataFrame
8
8
 
9
9
  ```ruby
10
- df = RedAmber::DataFrame.new(x: [1, 2, 3])
10
+ df = DataFrame.new(x: [1, 2, 3])
11
11
  df[:x]
12
12
  # =>
13
13
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
17
17
  ### New from an Array
18
18
 
19
19
  ```ruby
20
- vector = RedAmber::Vector.new([1, 2, 3])
20
+ vector = Vector.new([1, 2, 3])
21
21
  # or
22
- vector = RedAmber::Vector.new(1, 2, 3)
22
+ vector = Vector.new(1, 2, 3)
23
23
  # or
24
- vector = RedAmber::Vector.new(1..3)
24
+ vector = Vector.new(1..3)
25
25
  # or
26
- vector = RedAmber::Vector.new(Arrow::Array([1, 2, 3])
26
+ vector = Vector.new(Arrow::Array.new([1, 2, 3])
27
27
 
28
28
  # =>
29
29
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
61
61
 
62
62
  ### `type_class`
63
63
 
64
- ### `each`
64
+ ### `each`, `map`, `collect`
65
65
 
66
66
  If block is not given, returns Enumerator.
67
67
 
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
78
78
  - `limit` sets size limit to display a long array.
79
79
 
80
80
  ```ruby
81
- vector = RedAmber::Vector.new((1..50).to_a)
81
+ vector = Vector.new((1..50).to_a)
82
82
  # =>
83
83
  #<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
84
84
  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
95
95
  - Negative index is also OK like the Ruby's primitive Array.
96
96
 
97
97
  ```ruby
98
- array = RedAmber::Vector.new(%w[A B C D E])
99
- indices = RedAmber::Vector.new([0.1, -0.5, -5.1])
98
+ array = Vector.new(%w[A B C D E])
99
+ indices = Vector.new([0.1, -0.5, -5.1])
100
100
  array.take(indices)
101
101
  # or
102
102
  array[indices]
@@ -106,7 +106,7 @@ array[indices]
106
106
  ["A", "E", "A"]
107
107
  ```
108
108
 
109
- ### `filter(booleans)`, `[](booleans)`
109
+ ### `filter(booleans)`, `select(booleans)`, `[](booleans)`
110
110
 
111
111
  - Acceptable class for booleans:
112
112
  - An array of true, false, or nil
@@ -114,7 +114,7 @@ array[indices]
114
114
  - Arrow::BooleanArray
115
115
 
116
116
  ```ruby
117
- array = RedAmber::Vector.new(%w[A B C D E])
117
+ array = Vector.new(%w[A B C D E])
118
118
  booleans = [true, false, nil, false, true]
119
119
  array.filter(booleans)
120
120
  # or
@@ -124,6 +124,7 @@ array[booleans]
124
124
  #<RedAmber::Vector(:string, size=2):0x000000000000f21c>
125
125
  ["A", "E"]
126
126
  ```
127
+ `filter` and `select` also accepts a block.
127
128
 
128
129
  ## Functions
129
130
 
@@ -158,7 +159,7 @@ Options can be used as follows.
158
159
  See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
159
160
 
160
161
  ```ruby
161
- double = RedAmber::Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
+ double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
163
  #=>
163
164
  #<RedAmber::Vector(:double, size=6):0x000000000000f910>
164
165
  [1.0, NaN, -Infinity, Infinity, nil, 0.0]
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
168
169
  double.count(mode: :only_null) #=> 1
169
170
  double.count(mode: :all) #=> 6
170
171
 
171
- boolean = RedAmber::Vector.new([true, true, nil])
172
+ boolean = Vector.new([true, true, nil])
172
173
  #=>
173
174
  #<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
174
175
  [true, true, nil]
@@ -187,8 +188,8 @@ boolean.all(skip_nulls: false) #=> false
187
188
  | ✓ `-@` | | ✓ | | |as `-vector`|
188
189
  | ✓ `negate` | | ✓ | | |`-@` |
189
190
  | ✓ `abs` | | ✓ | | | |
190
- |[ ]`acos` | | [ ] | | | |
191
- |[ ]`asin` | | [ ] | | | |
191
+ | `acos` | || | | |
192
+ | `asin` | || | | |
192
193
  | ✓ `atan` | | ✓ | | | |
193
194
  | ✓ `bit_wise_not`| | (✓) | | |integer only|
194
195
  | ✓ `ceil` | | ✓ | | | |
@@ -197,10 +198,10 @@ boolean.all(skip_nulls: false) #=> false
197
198
  | ✓`fill_nil_forward` | ✓ | ✓ | ✓ | | |
198
199
  | ✓ `floor` | | ✓ | | | |
199
200
  | ✓ `invert` | ✓ | | | |`!`, alias `not`|
200
- |[ ]`ln` | | [ ] | | | |
201
- |[ ]`log10` | | [ ] | | | |
202
- |[ ]`log1p` | | [ ] | | | |
203
- |[ ]`log2` | | [ ] | | | |
201
+ | `ln` | || | | |
202
+ | `log10` | || | | |
203
+ | `log1p` | || | |Compute natural log of (1+x)|
204
+ | `log2` | || | | |
204
205
  | ✓ `round` | | ✓ | | ✓ Round (:mode, :n_digits)| |
205
206
  | ✓ `round_to_multiple`| | ✓ | | ✓ RoundToMultiple :mode, :multiple| multiple must be an Arrow::Scalar|
206
207
  | ✓ `sign` | | ✓ | | | |
@@ -215,7 +216,7 @@ Examples of options for `#round`;
215
216
  - `round_mode` Specify rounding mode.
216
217
 
217
218
  ```ruby
218
- double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
+ double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
220
  # => [15.15, 2.5, 3.5, -4.5, -5.5]
220
221
  double.round
221
222
  # => [15.0, 2.0, 4.0, -4.0, -6.0]
@@ -267,7 +268,7 @@ double.round(n_digits: -1)
267
268
  | ✓ `is_valid` | ✓ | ✓ | ✓ | | |
268
269
  | ✓ `less` | ✓ | ✓ | ✓ | |`<`, alias `lt`|
269
270
  | ✓ `less_equal` | ✓ | ✓ | ✓ | |`<=`, alias `le`|
270
- |[ ]`logb` | | [ ] | | | |
271
+ | `logb` | || | |logb(b) Compute base `b` logarithm|
271
272
  |[ ]`mod` | | [ ] | | | `%` |
272
273
  | ✓ `multiply` | | ✓ | | | `*` |
273
274
  | ✓ `not_equal` | ✓ | ✓ | ✓ | |`!=`, alias `ne`|
@@ -283,8 +284,6 @@ double.round(n_digits: -1)
283
284
 
284
285
  Returns a new array with distinct elements.
285
286
 
286
- (Not impremented functions)
287
-
288
287
  ### `tally` and `value_counts`
289
288
 
290
289
  Compute counts of unique elements and return a Hash.
@@ -295,7 +294,7 @@ double.round(n_digits: -1)
295
294
  array = [0.0/0, Float::NAN]
296
295
  array.tally #=> {NaN=>1, NaN=>1}
297
296
 
298
- vector = RedAmber::Vector.new(array)
297
+ vector = Vector.new(array)
299
298
  vector.tally #=> {NaN=>2}
300
299
  vector.value_counts #=> {NaN=>2}
301
300
  ```
@@ -309,19 +308,10 @@ double.round(n_digits: -1)
309
308
 
310
309
  ### `sort_indexes`, `sort_indices`, `array_sort_indices`
311
310
 
312
- ### [ ] `sort`, `sort_by`
313
- ### [ ] argmin, argmax
314
- ### [ ] (array functions)
315
- ### [ ] (strings functions)
316
- ### [ ] (temporal functions)
317
- ### [ ] (conditional functions)
318
- ### [ ] (index functions)
319
- ### [ ] (other functions)
320
-
321
311
  ## Coerce
322
312
 
323
313
  ```ruby
324
- vector = RedAmber::Vector.new(1,2,3)
314
+ vector = Vector.new(1,2,3)
325
315
  # =>
326
316
  #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
327
317
  [1, 2, 3]
@@ -351,12 +341,13 @@ vector * -1
351
341
  - Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
352
342
  - Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
353
343
  - Boolean specifiers specify the position of replacer in true.
344
+ - If booleans.any is false, no replacement happen and return self.
354
345
  - Index specifiers specify the position of replacer in indices.
355
346
  - replacer specifies the values to be replaced.
356
347
  - The number of true in booleans must be equal to the length of replacer
357
348
 
358
349
  ```ruby
359
- vector = RedAmber::Vector.new([1, 2, 3])
350
+ vector = Vector.new([1, 2, 3])
360
351
  booleans = [true, false, true]
361
352
  replacer = [4, 5]
362
353
  vector.replace(booleans, replacer)
@@ -390,7 +381,7 @@ vector.replace(booleans, replacer)
390
381
  ```ruby
391
382
  booleans = [true, false, nil]
392
383
  replacer = -1
393
- vec.replace(booleans, replacer)
384
+ vector.replace(booleans, replacer)
394
385
  =>
395
386
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
396
387
  [-1, 2, nil]
@@ -401,17 +392,7 @@ vec.replace(booleans, replacer)
401
392
  ```ruby
402
393
  booleans = [true, false, true]
403
394
  replacer = [nil]
404
- vec.replace(booleans, replacer)
405
- =>
406
- #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
407
- [nil, 2, nil]
408
- ```
409
-
410
- - If no replacer specified, it is same as to specify nil.
411
-
412
- ```ruby
413
- booleans = [true, false, true]
414
- vec.replace(booleans)
395
+ vector.replace(booleans, replacer)
415
396
  =>
416
397
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
417
398
  [nil, 2, nil]
@@ -420,7 +401,7 @@ vec.replace(booleans)
420
401
  - An example to replace 'NA' to nil.
421
402
 
422
403
  ```ruby
423
- vector = RedAmber::Vector.new(['A', 'B', 'NA'])
404
+ vector = Vector.new(['A', 'B', 'NA'])
424
405
  vector.replace(vector == 'NA', nil)
425
406
  # =>
426
407
  #<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
@@ -432,7 +413,7 @@ vector.replace(vector == 'NA', nil)
432
413
  Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
433
414
 
434
415
  ```ruby
435
- vector = RedAmber::Vector.new([1, 2, 3])
416
+ vector = Vector.new([1, 2, 3])
436
417
  indices = [2, 1]
437
418
  replacer = [4, 5]
438
419
  vector.replace(indices, replacer)
@@ -448,7 +429,7 @@ Propagate the last valid observation forward (or backward).
448
429
  Or preserve nil if all previous values are nil or at the end.
449
430
 
450
431
  ```ruby
451
- integer = RedAmber::Vector.new([0, 1, nil, 3, nil])
432
+ integer = Vector.new([0, 1, nil, 3, nil])
452
433
  integer.fill_nil_forward
453
434
  # =>
454
435
  #<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
@@ -470,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
470
451
  This example will normalize negative indices to positive ones.
471
452
 
472
453
  ```ruby
473
- indices = RedAmber::Vector.new([1, -1, 3, -4])
454
+ indices = Vector.new([1, -1, 3, -4])
474
455
  array_size = 10
475
456
  normalized_indices = (indices < 0).if_else(indices + array_size, indices)
476
457
 
@@ -485,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
485
466
  By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
486
467
 
487
468
  ```ruby
488
- vector = RedAmber::Vector.new %W[A B C D]
469
+ vector = Vector.new %W[A B C D]
489
470
  values = ['A', 'C', 'X']
490
471
  vector.is_in(values)
491
472
 
@@ -497,7 +478,7 @@ vector.is_in(values)
497
478
  `values` are casted to the same Class of Vector.
498
479
 
499
480
  ```ruby
500
- vector = RedAmber::Vector.new([1, 2, 255])
481
+ vector = Vector.new([1, 2, 255])
501
482
  vector.is_in(1, -1)
502
483
 
503
484
  # =>
@@ -510,7 +491,7 @@ vector.is_in(1, -1)
510
491
  Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
511
492
 
512
493
  ```ruby
513
- vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
494
+ vector = Vector.new([1, 2, 3, 4, 5])
514
495
  vector.shift
515
496
 
516
497
  # =>
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -7,6 +7,7 @@ module RedAmber
7
7
  # mix-in
8
8
  include DataFrameDisplayable
9
9
  include DataFrameIndexable
10
+ include DataFrameLoadSave
10
11
  include DataFrameReshaping
11
12
  include DataFrameSelectable
12
13
  include DataFrameVariableOperation
@@ -37,6 +38,13 @@ module RedAmber
37
38
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
38
39
  # returns empty DataFrame
39
40
  @table = Arrow::Table.new({}, [])
41
+ in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
42
+ table = arrowable.to_arrow
43
+ unless table.is_a?(Arrow::Table)
44
+ raise DataFrameTypeError,
45
+ "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
46
+ end
47
+ @table = table
40
48
  in [Arrow::Table => table]
41
49
  @table = table
42
50
  in [DataFrame => dataframe]
@@ -52,10 +60,9 @@ module RedAmber
52
60
  @table = Arrow::Table.new(*args)
53
61
  end
54
62
  name_unnamed_keys
55
- end
56
63
 
57
- def self.load(path, options = {})
58
- DataFrame.new(Arrow::Table.load(path, options))
64
+ duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
65
+ raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
59
66
  end
60
67
 
61
68
  attr_reader :table
@@ -64,10 +71,6 @@ module RedAmber
64
71
  @table
65
72
  end
66
73
 
67
- def save(output, options = {})
68
- @table.save(output, options)
69
- end
70
-
71
74
  # Returns the number of rows.
72
75
  #
73
76
  # @return [Integer] Number of rows.
@@ -159,12 +162,19 @@ module RedAmber
159
162
  @vectors || @vectors = init_instance_vars(:vectors)
160
163
  end
161
164
 
162
- # Returns row indices (0...size) in an Array.
165
+ # Returns row indices (start...(size+start)) in an Array.
163
166
  #
167
+ # @param start [Object]
168
+ # Object which have #succ method.
164
169
  # @return [Array]
165
- # An Array of all indices of rows.
166
- def indices
167
- (0...size).to_a
170
+ # An Array of indices of the row.
171
+ # @example
172
+ # (when self.size == 5)
173
+ # - indices #=> [0, 1, 2, 3, 4]
174
+ # - indices(1) #=> [1, 2, 3, 4, 5]
175
+ # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
176
+ def indices(start = 0)
177
+ (start..).take(size)
168
178
  end
169
179
  alias_method :indexes, :indices
170
180
 
@@ -208,23 +218,24 @@ module RedAmber
208
218
  Rover::DataFrame.new(to_h)
209
219
  end
210
220
 
211
- def to_iruby
212
- require 'iruby'
213
- return ['text/plain', '(empty DataFrame)'] if empty?
214
-
215
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
216
- size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
217
- else
218
- ['text/html', html_table]
219
- end
220
- end
221
-
222
221
  def group(*group_keys, &block)
223
222
  g = Group.new(self, group_keys)
224
223
  g = g.summarize(&block) if block
225
224
  g
226
225
  end
227
226
 
227
+ def method_missing(name, *args, &block)
228
+ return v(name) if args.empty?
229
+
230
+ super
231
+ end
232
+
233
+ def respond_to_missing?(name, include_private)
234
+ return true if key?(name)
235
+
236
+ super
237
+ end
238
+
228
239
  private
229
240
 
230
241
  # initialize @variable, @keys, @vectors and return one of them
@@ -241,25 +252,6 @@ module RedAmber
241
252
  ary[%i[variables keys vectors].index(var)]
242
253
  end
243
254
 
244
- def html_table
245
- reduced = size > 8 ? self[0..4, -4..-1] : self
246
-
247
- converted = reduced.assign do
248
- vectors.select.with_object({}) do |vector, assigner|
249
- if vector.has_nil?
250
- assigner[vector.key] = vector.to_a.map do |e|
251
- e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
252
- e = '""' if e.empty? # empty string
253
- e.sub(/(\s+)/, '"\1"') # blank spaces
254
- end
255
- end
256
- end
257
- end
258
-
259
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
260
- "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
261
- end
262
-
263
255
  def name_unnamed_keys
264
256
  return unless @table[:'']
265
257
 
@@ -37,8 +37,12 @@ module RedAmber
37
37
  alias_method :describe, :summary
38
38
 
39
39
  def inspect
40
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
40
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
+ case mode.upcase
42
+ when 'TDR'
41
43
  "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
44
+ when 'MINIMUM'
45
+ shape_str
42
46
  else
43
47
  "#<#{shape_str(with_id: true)}>\n#{self}"
44
48
  end
@@ -55,6 +59,23 @@ module RedAmber
55
59
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
56
60
  end
57
61
 
62
+ def to_iruby
63
+ require 'iruby'
64
+ return ['text/plain', '(empty DataFrame)'] if empty?
65
+
66
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
67
+ case mode.upcase
68
+ when 'PLAIN'
69
+ ['text/plain', inspect]
70
+ when 'MINIMUM'
71
+ ['text/plain', shape_str]
72
+ when 'TDR'
73
+ size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
74
+ else # 'TABLE'
75
+ ['text/html', html_table]
76
+ end
77
+ end
78
+
58
79
  private # =====
59
80
 
60
81
  def shape_str(with_id: false)
@@ -98,7 +119,7 @@ module RedAmber
98
119
  else
99
120
  [shorthand(vector, size, max_element)]
100
121
  end
101
- sio.printf header_format, i + 1, key, type, data_tally.size, a.join(', ')
122
+ sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
102
123
  end
103
124
  sio.string
104
125
  end
@@ -154,9 +175,9 @@ module RedAmber
154
175
 
155
176
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
156
177
  original = self
157
- indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
178
+ indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
179
  df = slice(indices).assign do
159
- assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
180
+ assigner = { INDEX_KEY => indices.map(&:to_s) }
160
181
  vectors.each_with_object(assigner) do |v, a|
161
182
  a[v.key] = v.to_a.map do |e|
162
183
  if e.nil?
@@ -173,12 +194,12 @@ module RedAmber
173
194
  end
174
195
 
175
196
  df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
176
- df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
197
+ df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
177
198
  df = df.assign do
178
199
  vectors.each_with_object({}) do |v, assigner|
179
200
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
180
201
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
181
- assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
202
+ assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
182
203
  end
183
204
  end
184
205
 
@@ -220,5 +241,37 @@ module RedAmber
220
241
  "%#{width}s"
221
242
  end
222
243
  end
244
+
245
+ def html_table
246
+ reduced = size > 8 ? self[0..4, -4..-1] : self
247
+
248
+ converted = reduced.assign do
249
+ vectors.select.with_object({}) do |vector, assigner|
250
+ assigner[vector.key] = vector.map do |element|
251
+ case element
252
+ in TrueClass
253
+ '<i>(true)</i>'
254
+ in FalseClass
255
+ '<i>(false)</i>'
256
+ in NilClass
257
+ '<i>(nil)</i>'
258
+ in ''
259
+ '""'
260
+ in String
261
+ element.sub(/^(\s+)$/, '"\1"') # blank spaces
262
+ in Float
263
+ format('%g', element)
264
+ in Integer
265
+ format('%d', element)
266
+ else
267
+ element
268
+ end
269
+ end
270
+ end
271
+ end
272
+
273
+ html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
274
+ "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
275
+ end
223
276
  end
224
277
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-ins for the class DataFrame
5
+ module DataFrameLoadSave
6
+ # Enable `self.load` as class method of DataFrame
7
+ def self.included(klass)
8
+ klass.extend ClassMethods
9
+ end
10
+
11
+ # Enable `self.load` as class method of DataFrame
12
+ module ClassMethods
13
+ # Load DataFrame via Arrow::Table.load
14
+ def load(path, options = {})
15
+ DataFrame.new(Arrow::Table.load(path, options))
16
+ end
17
+ end
18
+
19
+ # Save DataFrame
20
+ def save(output, options = {})
21
+ @table.save(output, options)
22
+ end
23
+
24
+ # Save and reload to cast automatically
25
+ # Via tsv format file temporally as default
26
+ #
27
+ # experimental feature
28
+ def auto_cast(format: :tsv)
29
+ return self if empty?
30
+
31
+ tempfile = Arrow::ResizableBuffer.new(1024)
32
+ save(tempfile, format: format)
33
+ DataFrame.load(tempfile, format: format)
34
+ end
35
+ end
36
+ end
@@ -5,20 +5,21 @@ module RedAmber
5
5
  module DataFrameReshaping
6
6
  # Transpose a wide DataFrame.
7
7
  #
8
- # @param key [Symbol, FalseClass] key of the index column
8
+ # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
- # If it is false, keys[0] is used.
11
- # @param new_key [Symbol, FalseClass] key name of transposed index column.
12
- # If it is false, :name is used. If it already exists, :name1.succ is used.
10
+ # If it is not specified, keys[0] is used.
11
+ # @param new_key [Symbol] key name of transposed index column.
12
+ # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, new_key: :name)
15
- raise DataFrameArgumentError, "Not include: #{key}" unless keys.include?(key)
14
+ def transpose(key: keys.first, name: :NAME)
15
+ raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- new_key = (:name1..).find { |k| !new_keys.include?(k) } if new_keys.include?(new_key)
19
+ name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { new_key => (keys - [key]) }
21
+ names = (keys - [key]).map { |x| x&.to_s }
22
+ hash = { name => names }
22
23
  i = keys.index(key)
23
24
  each_row do |h|
24
25
  k = h.values[i]
@@ -33,7 +34,7 @@ module RedAmber
33
34
  # @param name [Symbol, String] key of the column which is come **from values**.
34
35
  # @param value [Symbol, String] key of the column which is come **from values**.
35
36
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :name, value: :value)
37
+ def to_long(*keep_keys, name: :NAME, value: :VALUE)
37
38
  not_included = keep_keys - keys
38
39
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
40
 
@@ -55,6 +56,7 @@ module RedAmber
55
56
  end
56
57
  end
57
58
  end
59
+ hash[name] = hash[name].map { |x| x&.to_s }
58
60
  DataFrame.new(hash)
59
61
  end
60
62
 
@@ -63,7 +65,7 @@ module RedAmber
63
65
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
66
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
67
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :name, value: :value)
68
+ def to_wide(name: :NAME, value: :VALUE)
67
69
  name = name.to_sym
68
70
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
71