red_amber 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -0
  3. data/CHANGELOG.md +69 -2
  4. data/README.md +83 -280
  5. data/doc/DataFrame.md +279 -265
  6. data/doc/Vector.md +28 -36
  7. data/doc/image/basic_verbs.png +0 -0
  8. data/doc/image/dataframe/assign.png +0 -0
  9. data/doc/image/dataframe/assign_operation.png +0 -0
  10. data/doc/image/dataframe/drop.png +0 -0
  11. data/doc/image/dataframe/pick.png +0 -0
  12. data/doc/image/dataframe/pick_operation.png +0 -0
  13. data/doc/image/dataframe/remove.png +0 -0
  14. data/doc/image/dataframe/rename.png +0 -0
  15. data/doc/image/dataframe/rename_operation.png +0 -0
  16. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  17. data/doc/image/dataframe/slice.png +0 -0
  18. data/doc/image/dataframe/slice_operation.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/group_operation.png +0 -0
  21. data/doc/image/replace-if_then.png +0 -0
  22. data/doc/image/reshaping_dataframe.png +0 -0
  23. data/doc/image/screenshot.png +0 -0
  24. data/doc/image/vector/binary_element_wise.png +0 -0
  25. data/doc/image/vector/unary_aggregation.png +0 -0
  26. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  27. data/doc/image/vector/unary_element_wise.png +0 -0
  28. data/lib/red_amber/data_frame.rb +10 -37
  29. data/lib/red_amber/data_frame_displayable.rb +56 -3
  30. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  31. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  32. data/lib/red_amber/data_frame_variable_operation.rb +25 -19
  33. data/lib/red_amber/group.rb +5 -3
  34. data/lib/red_amber/helper.rb +20 -18
  35. data/lib/red_amber/vector.rb +49 -30
  36. data/lib/red_amber/vector_selectable.rb +9 -1
  37. data/lib/red_amber/vector_updatable.rb +6 -3
  38. data/lib/red_amber/version.rb +1 -1
  39. data/lib/red_amber.rb +1 -0
  40. metadata +13 -3
  41. data/doc/examples_of_red_amber.ipynb +0 -8979
data/doc/Vector.md CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
7
7
  ### Create from a column in a DataFrame
8
8
 
9
9
  ```ruby
10
- df = RedAmber::DataFrame.new(x: [1, 2, 3])
10
+ df = DataFrame.new(x: [1, 2, 3])
11
11
  df[:x]
12
12
  # =>
13
13
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
17
17
  ### New from an Array
18
18
 
19
19
  ```ruby
20
- vector = RedAmber::Vector.new([1, 2, 3])
20
+ vector = Vector.new([1, 2, 3])
21
21
  # or
22
- vector = RedAmber::Vector.new(1, 2, 3)
22
+ vector = Vector.new(1, 2, 3)
23
23
  # or
24
- vector = RedAmber::Vector.new(1..3)
24
+ vector = Vector.new(1..3)
25
25
  # or
26
- vector = RedAmber::Vector.new(Arrow::Array([1, 2, 3])
26
+ vector = Vector.new(Arrow::Array.new([1, 2, 3])
27
27
 
28
28
  # =>
29
29
  #<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
61
61
 
62
62
  ### `type_class`
63
63
 
64
- ### `each`
64
+ ### `each`, `map`, `collect`
65
65
 
66
66
  If block is not given, returns Enumerator.
67
67
 
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
78
78
  - `limit` sets size limit to display a long array.
79
79
 
80
80
  ```ruby
81
- vector = RedAmber::Vector.new((1..50).to_a)
81
+ vector = Vector.new((1..50).to_a)
82
82
  # =>
83
83
  #<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
84
84
  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
95
95
  - Negative index is also OK like the Ruby's primitive Array.
96
96
 
97
97
  ```ruby
98
- array = RedAmber::Vector.new(%w[A B C D E])
99
- indices = RedAmber::Vector.new([0.1, -0.5, -5.1])
98
+ array = Vector.new(%w[A B C D E])
99
+ indices = Vector.new([0.1, -0.5, -5.1])
100
100
  array.take(indices)
101
101
  # or
102
102
  array[indices]
@@ -106,7 +106,7 @@ array[indices]
106
106
  ["A", "E", "A"]
107
107
  ```
108
108
 
109
- ### `filter(booleans)`, `[](booleans)`
109
+ ### `filter(booleans)`, `select(booleans)`, `[](booleans)`
110
110
 
111
111
  - Acceptable class for booleans:
112
112
  - An array of true, false, or nil
@@ -114,7 +114,7 @@ array[indices]
114
114
  - Arrow::BooleanArray
115
115
 
116
116
  ```ruby
117
- array = RedAmber::Vector.new(%w[A B C D E])
117
+ array = Vector.new(%w[A B C D E])
118
118
  booleans = [true, false, nil, false, true]
119
119
  array.filter(booleans)
120
120
  # or
@@ -124,6 +124,7 @@ array[booleans]
124
124
  #<RedAmber::Vector(:string, size=2):0x000000000000f21c>
125
125
  ["A", "E"]
126
126
  ```
127
+ `filter` and `select` also accepts a block.
127
128
 
128
129
  ## Functions
129
130
 
@@ -158,7 +159,7 @@ Options can be used as follows.
158
159
  See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
159
160
 
160
161
  ```ruby
161
- double = RedAmber::Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
+ double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
162
163
  #=>
163
164
  #<RedAmber::Vector(:double, size=6):0x000000000000f910>
164
165
  [1.0, NaN, -Infinity, Infinity, nil, 0.0]
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
168
169
  double.count(mode: :only_null) #=> 1
169
170
  double.count(mode: :all) #=> 6
170
171
 
171
- boolean = RedAmber::Vector.new([true, true, nil])
172
+ boolean = Vector.new([true, true, nil])
172
173
  #=>
173
174
  #<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
174
175
  [true, true, nil]
@@ -215,7 +216,7 @@ Examples of options for `#round`;
215
216
  - `round_mode` Specify rounding mode.
216
217
 
217
218
  ```ruby
218
- double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
+ double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
219
220
  # => [15.15, 2.5, 3.5, -4.5, -5.5]
220
221
  double.round
221
222
  # => [15.0, 2.0, 4.0, -4.0, -6.0]
@@ -293,7 +294,7 @@ double.round(n_digits: -1)
293
294
  array = [0.0/0, Float::NAN]
294
295
  array.tally #=> {NaN=>1, NaN=>1}
295
296
 
296
- vector = RedAmber::Vector.new(array)
297
+ vector = Vector.new(array)
297
298
  vector.tally #=> {NaN=>2}
298
299
  vector.value_counts #=> {NaN=>2}
299
300
  ```
@@ -310,7 +311,7 @@ double.round(n_digits: -1)
310
311
  ## Coerce
311
312
 
312
313
  ```ruby
313
- vector = RedAmber::Vector.new(1,2,3)
314
+ vector = Vector.new(1,2,3)
314
315
  # =>
315
316
  #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
316
317
  [1, 2, 3]
@@ -340,12 +341,13 @@ vector * -1
340
341
  - Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
341
342
  - Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
342
343
  - Boolean specifiers specify the position of replacer in true.
344
+ - If booleans.any is false, no replacement happen and return self.
343
345
  - Index specifiers specify the position of replacer in indices.
344
346
  - replacer specifies the values to be replaced.
345
347
  - The number of true in booleans must be equal to the length of replacer
346
348
 
347
349
  ```ruby
348
- vector = RedAmber::Vector.new([1, 2, 3])
350
+ vector = Vector.new([1, 2, 3])
349
351
  booleans = [true, false, true]
350
352
  replacer = [4, 5]
351
353
  vector.replace(booleans, replacer)
@@ -379,7 +381,7 @@ vector.replace(booleans, replacer)
379
381
  ```ruby
380
382
  booleans = [true, false, nil]
381
383
  replacer = -1
382
- vec.replace(booleans, replacer)
384
+ vector.replace(booleans, replacer)
383
385
  =>
384
386
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
385
387
  [-1, 2, nil]
@@ -390,17 +392,7 @@ vec.replace(booleans, replacer)
390
392
  ```ruby
391
393
  booleans = [true, false, true]
392
394
  replacer = [nil]
393
- vec.replace(booleans, replacer)
394
- =>
395
- #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
396
- [nil, 2, nil]
397
- ```
398
-
399
- - If no replacer specified, it is same as to specify nil.
400
-
401
- ```ruby
402
- booleans = [true, false, true]
403
- vec.replace(booleans)
395
+ vector.replace(booleans, replacer)
404
396
  =>
405
397
  #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
406
398
  [nil, 2, nil]
@@ -409,7 +401,7 @@ vec.replace(booleans)
409
401
  - An example to replace 'NA' to nil.
410
402
 
411
403
  ```ruby
412
- vector = RedAmber::Vector.new(['A', 'B', 'NA'])
404
+ vector = Vector.new(['A', 'B', 'NA'])
413
405
  vector.replace(vector == 'NA', nil)
414
406
  # =>
415
407
  #<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
@@ -421,7 +413,7 @@ vector.replace(vector == 'NA', nil)
421
413
  Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
422
414
 
423
415
  ```ruby
424
- vector = RedAmber::Vector.new([1, 2, 3])
416
+ vector = Vector.new([1, 2, 3])
425
417
  indices = [2, 1]
426
418
  replacer = [4, 5]
427
419
  vector.replace(indices, replacer)
@@ -437,7 +429,7 @@ Propagate the last valid observation forward (or backward).
437
429
  Or preserve nil if all previous values are nil or at the end.
438
430
 
439
431
  ```ruby
440
- integer = RedAmber::Vector.new([0, 1, nil, 3, nil])
432
+ integer = Vector.new([0, 1, nil, 3, nil])
441
433
  integer.fill_nil_forward
442
434
  # =>
443
435
  #<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
@@ -459,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
459
451
  This example will normalize negative indices to positive ones.
460
452
 
461
453
  ```ruby
462
- indices = RedAmber::Vector.new([1, -1, 3, -4])
454
+ indices = Vector.new([1, -1, 3, -4])
463
455
  array_size = 10
464
456
  normalized_indices = (indices < 0).if_else(indices + array_size, indices)
465
457
 
@@ -474,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
474
466
  By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
475
467
 
476
468
  ```ruby
477
- vector = RedAmber::Vector.new %W[A B C D]
469
+ vector = Vector.new %W[A B C D]
478
470
  values = ['A', 'C', 'X']
479
471
  vector.is_in(values)
480
472
 
@@ -486,7 +478,7 @@ vector.is_in(values)
486
478
  `values` are casted to the same Class of Vector.
487
479
 
488
480
  ```ruby
489
- vector = RedAmber::Vector.new([1, 2, 255])
481
+ vector = Vector.new([1, 2, 255])
490
482
  vector.is_in(1, -1)
491
483
 
492
484
  # =>
@@ -499,7 +491,7 @@ vector.is_in(1, -1)
499
491
  Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
500
492
 
501
493
  ```ruby
502
- vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
494
+ vector = Vector.new([1, 2, 3, 4, 5])
503
495
  vector.shift
504
496
 
505
497
  # =>
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -7,6 +7,7 @@ module RedAmber
7
7
  # mix-in
8
8
  include DataFrameDisplayable
9
9
  include DataFrameIndexable
10
+ include DataFrameLoadSave
10
11
  include DataFrameReshaping
11
12
  include DataFrameSelectable
12
13
  include DataFrameVariableOperation
@@ -37,6 +38,13 @@ module RedAmber
37
38
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
38
39
  # returns empty DataFrame
39
40
  @table = Arrow::Table.new({}, [])
41
+ in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
42
+ table = arrowable.to_arrow
43
+ unless table.is_a?(Arrow::Table)
44
+ raise DataFrameTypeError,
45
+ "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
46
+ end
47
+ @table = table
40
48
  in [Arrow::Table => table]
41
49
  @table = table
42
50
  in [DataFrame => dataframe]
@@ -52,10 +60,9 @@ module RedAmber
52
60
  @table = Arrow::Table.new(*args)
53
61
  end
54
62
  name_unnamed_keys
55
- end
56
63
 
57
- def self.load(path, options = {})
58
- DataFrame.new(Arrow::Table.load(path, options))
64
+ duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
65
+ raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
59
66
  end
60
67
 
61
68
  attr_reader :table
@@ -64,10 +71,6 @@ module RedAmber
64
71
  @table
65
72
  end
66
73
 
67
- def save(output, options = {})
68
- @table.save(output, options)
69
- end
70
-
71
74
  # Returns the number of rows.
72
75
  #
73
76
  # @return [Integer] Number of rows.
@@ -215,17 +218,6 @@ module RedAmber
215
218
  Rover::DataFrame.new(to_h)
216
219
  end
217
220
 
218
- def to_iruby
219
- require 'iruby'
220
- return ['text/plain', '(empty DataFrame)'] if empty?
221
-
222
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
223
- size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
224
- else
225
- ['text/html', html_table]
226
- end
227
- end
228
-
229
221
  def group(*group_keys, &block)
230
222
  g = Group.new(self, group_keys)
231
223
  g = g.summarize(&block) if block
@@ -260,25 +252,6 @@ module RedAmber
260
252
  ary[%i[variables keys vectors].index(var)]
261
253
  end
262
254
 
263
- def html_table
264
- reduced = size > 8 ? self[0..4, -4..-1] : self
265
-
266
- converted = reduced.assign do
267
- vectors.select.with_object({}) do |vector, assigner|
268
- if vector.has_nil?
269
- assigner[vector.key] = vector.to_a.map do |e|
270
- e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
271
- e = '""' if e.empty? # empty string
272
- e.sub(/(\s+)/, '"\1"') # blank spaces
273
- end
274
- end
275
- end
276
- end
277
-
278
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
279
- "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
280
- end
281
-
282
255
  def name_unnamed_keys
283
256
  return unless @table[:'']
284
257
 
@@ -37,8 +37,12 @@ module RedAmber
37
37
  alias_method :describe, :summary
38
38
 
39
39
  def inspect
40
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
40
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
+ case mode.upcase
42
+ when 'TDR'
41
43
  "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
44
+ when 'MINIMUM'
45
+ shape_str
42
46
  else
43
47
  "#<#{shape_str(with_id: true)}>\n#{self}"
44
48
  end
@@ -55,6 +59,23 @@ module RedAmber
55
59
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
56
60
  end
57
61
 
62
+ def to_iruby
63
+ require 'iruby'
64
+ return ['text/plain', '(empty DataFrame)'] if empty?
65
+
66
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
67
+ case mode.upcase
68
+ when 'PLAIN'
69
+ ['text/plain', inspect]
70
+ when 'MINIMUM'
71
+ ['text/plain', shape_str]
72
+ when 'TDR'
73
+ size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
74
+ else # 'TABLE'
75
+ ['text/html', html_table]
76
+ end
77
+ end
78
+
58
79
  private # =====
59
80
 
60
81
  def shape_str(with_id: false)
@@ -98,7 +119,7 @@ module RedAmber
98
119
  else
99
120
  [shorthand(vector, size, max_element)]
100
121
  end
101
- sio.printf header_format, i + 1, key, type, data_tally.size, a.join(', ')
122
+ sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
102
123
  end
103
124
  sio.string
104
125
  end
@@ -156,7 +177,7 @@ module RedAmber
156
177
  original = self
157
178
  indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
179
  df = slice(indices).assign do
159
- assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
180
+ assigner = { INDEX_KEY => indices.map(&:to_s) }
160
181
  vectors.each_with_object(assigner) do |v, a|
161
182
  a[v.key] = v.to_a.map do |e|
162
183
  if e.nil?
@@ -220,5 +241,37 @@ module RedAmber
220
241
  "%#{width}s"
221
242
  end
222
243
  end
244
+
245
+ def html_table
246
+ reduced = size > 8 ? self[0..4, -4..-1] : self
247
+
248
+ converted = reduced.assign do
249
+ vectors.select.with_object({}) do |vector, assigner|
250
+ assigner[vector.key] = vector.map do |element|
251
+ case element
252
+ in TrueClass
253
+ '<i>(true)</i>'
254
+ in FalseClass
255
+ '<i>(false)</i>'
256
+ in NilClass
257
+ '<i>(nil)</i>'
258
+ in ''
259
+ '""'
260
+ in String
261
+ element.sub(/^(\s+)$/, '"\1"') # blank spaces
262
+ in Float
263
+ format('%g', element)
264
+ in Integer
265
+ format('%d', element)
266
+ else
267
+ element
268
+ end
269
+ end
270
+ end
271
+ end
272
+
273
+ html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
274
+ "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
275
+ end
223
276
  end
224
277
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-ins for the class DataFrame
5
+ module DataFrameLoadSave
6
+ # Enable `self.load` as class method of DataFrame
7
+ def self.included(klass)
8
+ klass.extend ClassMethods
9
+ end
10
+
11
+ # Enable `self.load` as class method of DataFrame
12
+ module ClassMethods
13
+ # Load DataFrame via Arrow::Table.load
14
+ def load(path, options = {})
15
+ DataFrame.new(Arrow::Table.load(path, options))
16
+ end
17
+ end
18
+
19
+ # Save DataFrame
20
+ def save(output, options = {})
21
+ @table.save(output, options)
22
+ end
23
+
24
+ # Save and reload to cast automatically
25
+ # Via tsv format file temporally as default
26
+ #
27
+ # experimental feature
28
+ def auto_cast(format: :tsv)
29
+ return self if empty?
30
+
31
+ tempfile = Arrow::ResizableBuffer.new(1024)
32
+ save(tempfile, format: format)
33
+ DataFrame.load(tempfile, format: format)
34
+ end
35
+ end
36
+ end
@@ -9,16 +9,17 @@ module RedAmber
9
9
  # to transepose into keys.
10
10
  # If it is not specified, keys[0] is used.
11
11
  # @param new_key [Symbol] key name of transposed index column.
12
- # If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
12
+ # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, name: :N)
14
+ def transpose(key: keys.first, name: :NAME)
15
15
  raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
19
+ name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { name => (keys - [key]) }
21
+ names = (keys - [key]).map { |x| x&.to_s }
22
+ hash = { name => names }
22
23
  i = keys.index(key)
23
24
  each_row do |h|
24
25
  k = h.values[i]
@@ -33,7 +34,7 @@ module RedAmber
33
34
  # @param name [Symbol, String] key of the column which is come **from values**.
34
35
  # @param value [Symbol, String] key of the column which is come **from values**.
35
36
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :N, value: :V)
37
+ def to_long(*keep_keys, name: :NAME, value: :VALUE)
37
38
  not_included = keep_keys - keys
38
39
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
40
 
@@ -55,6 +56,7 @@ module RedAmber
55
56
  end
56
57
  end
57
58
  end
59
+ hash[name] = hash[name].map { |x| x&.to_s }
58
60
  DataFrame.new(hash)
59
61
  end
60
62
 
@@ -63,7 +65,7 @@ module RedAmber
63
65
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
66
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
67
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :N, value: :V)
68
+ def to_wide(name: :NAME, value: :VALUE)
67
69
  name = name.to_sym
68
70
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
71
 
@@ -15,16 +15,22 @@ module RedAmber
15
15
  return DataFrame.new if picker.empty? || picker == [nil]
16
16
 
17
17
  key_vector = Vector.new(keys)
18
- picker_vector = parse_to_vector(picker)
19
-
20
- picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
21
- picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
18
+ vec = parse_to_vector(picker, vsize: n_keys)
19
+
20
+ ary =
21
+ if vec.boolean?
22
+ key_vector.filter(*vec).to_a
23
+ elsif vec.numeric?
24
+ key_vector.take(*vec).to_a
25
+ elsif vec.string? || vec.dictionary?
26
+ picker
27
+ else
28
+ raise DataFrameArgumentError, "Invalid argument #{args}"
29
+ end
22
30
 
23
31
  # DataFrame#[] creates a Vector with single key is specified.
24
32
  # DataFrame#pick creates a DataFrame with single key.
25
- return DataFrame.new(@table[picker]) if sym_or_str?(picker)
26
-
27
- raise DataFrameArgumentError, "Invalid argument #{args}"
33
+ DataFrame.new(@table[ary])
28
34
  end
29
35
 
30
36
  # drop some variables to create remainer sub DataFrame
@@ -38,24 +44,24 @@ module RedAmber
38
44
  dropper.flatten!
39
45
 
40
46
  key_vector = Vector.new(keys)
41
- dropper_vector = parse_to_vector(dropper)
42
-
43
- picker =
44
- if dropper_vector.boolean?
45
- key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
46
- elsif dropper_vector.numeric?
47
- keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
48
- else
47
+ vec = parse_to_vector(dropper, vsize: n_keys)
48
+
49
+ ary =
50
+ if vec.boolean?
51
+ key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
52
+ elsif vec.numeric?
53
+ keys - key_vector.take(*vec).each.map(&:to_sym) # Array
54
+ elsif vec.string? || vec.dictionary?
49
55
  keys - dropper
56
+ else
57
+ raise DataFrameArgumentError, "Invalid argument #{args}"
50
58
  end
51
59
 
52
- return DataFrame.new if picker.empty?
60
+ return DataFrame.new if ary.empty?
53
61
 
54
62
  # DataFrame#[] creates a Vector with single key is specified.
55
63
  # DataFrame#drop creates a DataFrame with single key.
56
- return DataFrame.new(@table[picker]) if sym_or_str?(picker)
57
-
58
- raise DataFrameArgumentError, "Invalid argument #{args}"
64
+ DataFrame.new(@table[ary])
59
65
  end
60
66
 
61
67
  # rename variables to create a new DataFrame
@@ -54,9 +54,11 @@ module RedAmber
54
54
  raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
55
55
 
56
56
  df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
57
- df = df[@group_keys, df.keys - @group_keys]
58
- # if counts are the same (no nil included), aggregate count columns.
59
- df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
57
+ df = df.pick(@group_keys, df.keys - @group_keys)
58
+ # if counts are the same (and do not include NaN or nil), aggregate count columns.
59
+ if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
60
+ df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
61
+ end
60
62
  df
61
63
  end
62
64
  end