red_amber 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +69 -2
- data/README.md +83 -280
- data/doc/DataFrame.md +279 -265
- data/doc/Vector.md +28 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +10 -37
- data/lib/red_amber/data_frame_displayable.rb +56 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_variable_operation.rb +25 -19
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +49 -30
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -8979
data/doc/Vector.md
CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
7
7
|
### Create from a column in a DataFrame
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
df =
|
10
|
+
df = DataFrame.new(x: [1, 2, 3])
|
11
11
|
df[:x]
|
12
12
|
# =>
|
13
13
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
|
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
17
17
|
### New from an Array
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
vector =
|
20
|
+
vector = Vector.new([1, 2, 3])
|
21
21
|
# or
|
22
|
-
vector =
|
22
|
+
vector = Vector.new(1, 2, 3)
|
23
23
|
# or
|
24
|
-
vector =
|
24
|
+
vector = Vector.new(1..3)
|
25
25
|
# or
|
26
|
-
vector =
|
26
|
+
vector = Vector.new(Arrow::Array.new([1, 2, 3])
|
27
27
|
|
28
28
|
# =>
|
29
29
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
61
61
|
|
62
62
|
### `type_class`
|
63
63
|
|
64
|
-
### `each`
|
64
|
+
### `each`, `map`, `collect`
|
65
65
|
|
66
66
|
If block is not given, returns Enumerator.
|
67
67
|
|
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
78
78
|
- `limit` sets size limit to display a long array.
|
79
79
|
|
80
80
|
```ruby
|
81
|
-
vector =
|
81
|
+
vector = Vector.new((1..50).to_a)
|
82
82
|
# =>
|
83
83
|
#<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
|
84
84
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
|
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
95
95
|
- Negative index is also OK like the Ruby's primitive Array.
|
96
96
|
|
97
97
|
```ruby
|
98
|
-
array =
|
99
|
-
indices =
|
98
|
+
array = Vector.new(%w[A B C D E])
|
99
|
+
indices = Vector.new([0.1, -0.5, -5.1])
|
100
100
|
array.take(indices)
|
101
101
|
# or
|
102
102
|
array[indices]
|
@@ -106,7 +106,7 @@ array[indices]
|
|
106
106
|
["A", "E", "A"]
|
107
107
|
```
|
108
108
|
|
109
|
-
### `filter(booleans)`, `[](booleans)`
|
109
|
+
### `filter(booleans)`, `select(booleans)`, `[](booleans)`
|
110
110
|
|
111
111
|
- Acceptable class for booleans:
|
112
112
|
- An array of true, false, or nil
|
@@ -114,7 +114,7 @@ array[indices]
|
|
114
114
|
- Arrow::BooleanArray
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
array =
|
117
|
+
array = Vector.new(%w[A B C D E])
|
118
118
|
booleans = [true, false, nil, false, true]
|
119
119
|
array.filter(booleans)
|
120
120
|
# or
|
@@ -124,6 +124,7 @@ array[booleans]
|
|
124
124
|
#<RedAmber::Vector(:string, size=2):0x000000000000f21c>
|
125
125
|
["A", "E"]
|
126
126
|
```
|
127
|
+
`filter` and `select` also accepts a block.
|
127
128
|
|
128
129
|
## Functions
|
129
130
|
|
@@ -158,7 +159,7 @@ Options can be used as follows.
|
|
158
159
|
See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
|
159
160
|
|
160
161
|
```ruby
|
161
|
-
double =
|
162
|
+
double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
|
162
163
|
#=>
|
163
164
|
#<RedAmber::Vector(:double, size=6):0x000000000000f910>
|
164
165
|
[1.0, NaN, -Infinity, Infinity, nil, 0.0]
|
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
|
|
168
169
|
double.count(mode: :only_null) #=> 1
|
169
170
|
double.count(mode: :all) #=> 6
|
170
171
|
|
171
|
-
boolean =
|
172
|
+
boolean = Vector.new([true, true, nil])
|
172
173
|
#=>
|
173
174
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
|
174
175
|
[true, true, nil]
|
@@ -215,7 +216,7 @@ Examples of options for `#round`;
|
|
215
216
|
- `round_mode` Specify rounding mode.
|
216
217
|
|
217
218
|
```ruby
|
218
|
-
double =
|
219
|
+
double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
|
219
220
|
# => [15.15, 2.5, 3.5, -4.5, -5.5]
|
220
221
|
double.round
|
221
222
|
# => [15.0, 2.0, 4.0, -4.0, -6.0]
|
@@ -293,7 +294,7 @@ double.round(n_digits: -1)
|
|
293
294
|
array = [0.0/0, Float::NAN]
|
294
295
|
array.tally #=> {NaN=>1, NaN=>1}
|
295
296
|
|
296
|
-
vector =
|
297
|
+
vector = Vector.new(array)
|
297
298
|
vector.tally #=> {NaN=>2}
|
298
299
|
vector.value_counts #=> {NaN=>2}
|
299
300
|
```
|
@@ -310,7 +311,7 @@ double.round(n_digits: -1)
|
|
310
311
|
## Coerce
|
311
312
|
|
312
313
|
```ruby
|
313
|
-
vector =
|
314
|
+
vector = Vector.new(1,2,3)
|
314
315
|
# =>
|
315
316
|
#<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
316
317
|
[1, 2, 3]
|
@@ -340,12 +341,13 @@ vector * -1
|
|
340
341
|
- Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
|
341
342
|
- Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
|
342
343
|
- Boolean specifiers specify the position of replacer in true.
|
344
|
+
- If booleans.any is false, no replacement happen and return self.
|
343
345
|
- Index specifiers specify the position of replacer in indices.
|
344
346
|
- replacer specifies the values to be replaced.
|
345
347
|
- The number of true in booleans must be equal to the length of replacer
|
346
348
|
|
347
349
|
```ruby
|
348
|
-
vector =
|
350
|
+
vector = Vector.new([1, 2, 3])
|
349
351
|
booleans = [true, false, true]
|
350
352
|
replacer = [4, 5]
|
351
353
|
vector.replace(booleans, replacer)
|
@@ -379,7 +381,7 @@ vector.replace(booleans, replacer)
|
|
379
381
|
```ruby
|
380
382
|
booleans = [true, false, nil]
|
381
383
|
replacer = -1
|
382
|
-
|
384
|
+
vector.replace(booleans, replacer)
|
383
385
|
=>
|
384
386
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
385
387
|
[-1, 2, nil]
|
@@ -390,17 +392,7 @@ vec.replace(booleans, replacer)
|
|
390
392
|
```ruby
|
391
393
|
booleans = [true, false, true]
|
392
394
|
replacer = [nil]
|
393
|
-
|
394
|
-
=>
|
395
|
-
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
396
|
-
[nil, 2, nil]
|
397
|
-
```
|
398
|
-
|
399
|
-
- If no replacer specified, it is same as to specify nil.
|
400
|
-
|
401
|
-
```ruby
|
402
|
-
booleans = [true, false, true]
|
403
|
-
vec.replace(booleans)
|
395
|
+
vector.replace(booleans, replacer)
|
404
396
|
=>
|
405
397
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
406
398
|
[nil, 2, nil]
|
@@ -409,7 +401,7 @@ vec.replace(booleans)
|
|
409
401
|
- An example to replace 'NA' to nil.
|
410
402
|
|
411
403
|
```ruby
|
412
|
-
vector =
|
404
|
+
vector = Vector.new(['A', 'B', 'NA'])
|
413
405
|
vector.replace(vector == 'NA', nil)
|
414
406
|
# =>
|
415
407
|
#<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
|
@@ -421,7 +413,7 @@ vector.replace(vector == 'NA', nil)
|
|
421
413
|
Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
|
422
414
|
|
423
415
|
```ruby
|
424
|
-
vector =
|
416
|
+
vector = Vector.new([1, 2, 3])
|
425
417
|
indices = [2, 1]
|
426
418
|
replacer = [4, 5]
|
427
419
|
vector.replace(indices, replacer)
|
@@ -437,7 +429,7 @@ Propagate the last valid observation forward (or backward).
|
|
437
429
|
Or preserve nil if all previous values are nil or at the end.
|
438
430
|
|
439
431
|
```ruby
|
440
|
-
integer =
|
432
|
+
integer = Vector.new([0, 1, nil, 3, nil])
|
441
433
|
integer.fill_nil_forward
|
442
434
|
# =>
|
443
435
|
#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
|
@@ -459,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
|
|
459
451
|
This example will normalize negative indices to positive ones.
|
460
452
|
|
461
453
|
```ruby
|
462
|
-
indices =
|
454
|
+
indices = Vector.new([1, -1, 3, -4])
|
463
455
|
array_size = 10
|
464
456
|
normalized_indices = (indices < 0).if_else(indices + array_size, indices)
|
465
457
|
|
@@ -474,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
|
|
474
466
|
By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
|
475
467
|
|
476
468
|
```ruby
|
477
|
-
vector =
|
469
|
+
vector = Vector.new %W[A B C D]
|
478
470
|
values = ['A', 'C', 'X']
|
479
471
|
vector.is_in(values)
|
480
472
|
|
@@ -486,7 +478,7 @@ vector.is_in(values)
|
|
486
478
|
`values` are casted to the same Class of Vector.
|
487
479
|
|
488
480
|
```ruby
|
489
|
-
vector =
|
481
|
+
vector = Vector.new([1, 2, 255])
|
490
482
|
vector.is_in(1, -1)
|
491
483
|
|
492
484
|
# =>
|
@@ -499,7 +491,7 @@ vector.is_in(1, -1)
|
|
499
491
|
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
500
492
|
|
501
493
|
```ruby
|
502
|
-
vector =
|
494
|
+
vector = Vector.new([1, 2, 3, 4, 5])
|
503
495
|
vector.shift
|
504
496
|
|
505
497
|
# =>
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -7,6 +7,7 @@ module RedAmber
|
|
7
7
|
# mix-in
|
8
8
|
include DataFrameDisplayable
|
9
9
|
include DataFrameIndexable
|
10
|
+
include DataFrameLoadSave
|
10
11
|
include DataFrameReshaping
|
11
12
|
include DataFrameSelectable
|
12
13
|
include DataFrameVariableOperation
|
@@ -37,6 +38,13 @@ module RedAmber
|
|
37
38
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
38
39
|
# returns empty DataFrame
|
39
40
|
@table = Arrow::Table.new({}, [])
|
41
|
+
in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
|
42
|
+
table = arrowable.to_arrow
|
43
|
+
unless table.is_a?(Arrow::Table)
|
44
|
+
raise DataFrameTypeError,
|
45
|
+
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
46
|
+
end
|
47
|
+
@table = table
|
40
48
|
in [Arrow::Table => table]
|
41
49
|
@table = table
|
42
50
|
in [DataFrame => dataframe]
|
@@ -52,10 +60,9 @@ module RedAmber
|
|
52
60
|
@table = Arrow::Table.new(*args)
|
53
61
|
end
|
54
62
|
name_unnamed_keys
|
55
|
-
end
|
56
63
|
|
57
|
-
|
58
|
-
|
64
|
+
duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
|
65
|
+
raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
|
59
66
|
end
|
60
67
|
|
61
68
|
attr_reader :table
|
@@ -64,10 +71,6 @@ module RedAmber
|
|
64
71
|
@table
|
65
72
|
end
|
66
73
|
|
67
|
-
def save(output, options = {})
|
68
|
-
@table.save(output, options)
|
69
|
-
end
|
70
|
-
|
71
74
|
# Returns the number of rows.
|
72
75
|
#
|
73
76
|
# @return [Integer] Number of rows.
|
@@ -215,17 +218,6 @@ module RedAmber
|
|
215
218
|
Rover::DataFrame.new(to_h)
|
216
219
|
end
|
217
220
|
|
218
|
-
def to_iruby
|
219
|
-
require 'iruby'
|
220
|
-
return ['text/plain', '(empty DataFrame)'] if empty?
|
221
|
-
|
222
|
-
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
|
223
|
-
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
224
|
-
else
|
225
|
-
['text/html', html_table]
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
221
|
def group(*group_keys, &block)
|
230
222
|
g = Group.new(self, group_keys)
|
231
223
|
g = g.summarize(&block) if block
|
@@ -260,25 +252,6 @@ module RedAmber
|
|
260
252
|
ary[%i[variables keys vectors].index(var)]
|
261
253
|
end
|
262
254
|
|
263
|
-
def html_table
|
264
|
-
reduced = size > 8 ? self[0..4, -4..-1] : self
|
265
|
-
|
266
|
-
converted = reduced.assign do
|
267
|
-
vectors.select.with_object({}) do |vector, assigner|
|
268
|
-
if vector.has_nil?
|
269
|
-
assigner[vector.key] = vector.to_a.map do |e|
|
270
|
-
e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
|
271
|
-
e = '""' if e.empty? # empty string
|
272
|
-
e.sub(/(\s+)/, '"\1"') # blank spaces
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
279
|
-
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
280
|
-
end
|
281
|
-
|
282
255
|
def name_unnamed_keys
|
283
256
|
return unless @table[:'']
|
284
257
|
|
@@ -37,8 +37,12 @@ module RedAmber
|
|
37
37
|
alias_method :describe, :summary
|
38
38
|
|
39
39
|
def inspect
|
40
|
-
|
40
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
|
+
case mode.upcase
|
42
|
+
when 'TDR'
|
41
43
|
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
44
|
+
when 'MINIMUM'
|
45
|
+
shape_str
|
42
46
|
else
|
43
47
|
"#<#{shape_str(with_id: true)}>\n#{self}"
|
44
48
|
end
|
@@ -55,6 +59,23 @@ module RedAmber
|
|
55
59
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
56
60
|
end
|
57
61
|
|
62
|
+
def to_iruby
|
63
|
+
require 'iruby'
|
64
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
65
|
+
|
66
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
67
|
+
case mode.upcase
|
68
|
+
when 'PLAIN'
|
69
|
+
['text/plain', inspect]
|
70
|
+
when 'MINIMUM'
|
71
|
+
['text/plain', shape_str]
|
72
|
+
when 'TDR'
|
73
|
+
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
74
|
+
else # 'TABLE'
|
75
|
+
['text/html', html_table]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
58
79
|
private # =====
|
59
80
|
|
60
81
|
def shape_str(with_id: false)
|
@@ -98,7 +119,7 @@ module RedAmber
|
|
98
119
|
else
|
99
120
|
[shorthand(vector, size, max_element)]
|
100
121
|
end
|
101
|
-
sio.printf header_format, i
|
122
|
+
sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
|
102
123
|
end
|
103
124
|
sio.string
|
104
125
|
end
|
@@ -156,7 +177,7 @@ module RedAmber
|
|
156
177
|
original = self
|
157
178
|
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
179
|
df = slice(indices).assign do
|
159
|
-
assigner = { INDEX_KEY => indices.map
|
180
|
+
assigner = { INDEX_KEY => indices.map(&:to_s) }
|
160
181
|
vectors.each_with_object(assigner) do |v, a|
|
161
182
|
a[v.key] = v.to_a.map do |e|
|
162
183
|
if e.nil?
|
@@ -220,5 +241,37 @@ module RedAmber
|
|
220
241
|
"%#{width}s"
|
221
242
|
end
|
222
243
|
end
|
244
|
+
|
245
|
+
def html_table
|
246
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
247
|
+
|
248
|
+
converted = reduced.assign do
|
249
|
+
vectors.select.with_object({}) do |vector, assigner|
|
250
|
+
assigner[vector.key] = vector.map do |element|
|
251
|
+
case element
|
252
|
+
in TrueClass
|
253
|
+
'<i>(true)</i>'
|
254
|
+
in FalseClass
|
255
|
+
'<i>(false)</i>'
|
256
|
+
in NilClass
|
257
|
+
'<i>(nil)</i>'
|
258
|
+
in ''
|
259
|
+
'""'
|
260
|
+
in String
|
261
|
+
element.sub(/^(\s+)$/, '"\1"') # blank spaces
|
262
|
+
in Float
|
263
|
+
format('%g', element)
|
264
|
+
in Integer
|
265
|
+
format('%d', element)
|
266
|
+
else
|
267
|
+
element
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
274
|
+
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
275
|
+
end
|
223
276
|
end
|
224
277
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameLoadSave
|
6
|
+
# Enable `self.load` as class method of DataFrame
|
7
|
+
def self.included(klass)
|
8
|
+
klass.extend ClassMethods
|
9
|
+
end
|
10
|
+
|
11
|
+
# Enable `self.load` as class method of DataFrame
|
12
|
+
module ClassMethods
|
13
|
+
# Load DataFrame via Arrow::Table.load
|
14
|
+
def load(path, options = {})
|
15
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Save DataFrame
|
20
|
+
def save(output, options = {})
|
21
|
+
@table.save(output, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Save and reload to cast automatically
|
25
|
+
# Via tsv format file temporally as default
|
26
|
+
#
|
27
|
+
# experimental feature
|
28
|
+
def auto_cast(format: :tsv)
|
29
|
+
return self if empty?
|
30
|
+
|
31
|
+
tempfile = Arrow::ResizableBuffer.new(1024)
|
32
|
+
save(tempfile, format: format)
|
33
|
+
DataFrame.load(tempfile, format: format)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -9,16 +9,17 @@ module RedAmber
|
|
9
9
|
# to transepose into keys.
|
10
10
|
# If it is not specified, keys[0] is used.
|
11
11
|
# @param new_key [Symbol] key name of transposed index column.
|
12
|
-
# If it is not specified, :
|
12
|
+
# If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first, name: :
|
14
|
+
def transpose(key: keys.first, name: :NAME)
|
15
15
|
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
name = (:
|
19
|
+
name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
|
21
|
+
names = (keys - [key]).map { |x| x&.to_s }
|
22
|
+
hash = { name => names }
|
22
23
|
i = keys.index(key)
|
23
24
|
each_row do |h|
|
24
25
|
k = h.values[i]
|
@@ -33,7 +34,7 @@ module RedAmber
|
|
33
34
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
35
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
36
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
37
|
+
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
37
38
|
not_included = keep_keys - keys
|
38
39
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
40
|
|
@@ -55,6 +56,7 @@ module RedAmber
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
59
|
+
hash[name] = hash[name].map { |x| x&.to_s }
|
58
60
|
DataFrame.new(hash)
|
59
61
|
end
|
60
62
|
|
@@ -63,7 +65,7 @@ module RedAmber
|
|
63
65
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
66
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
67
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
68
|
+
def to_wide(name: :NAME, value: :VALUE)
|
67
69
|
name = name.to_sym
|
68
70
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
71
|
|
@@ -15,16 +15,22 @@ module RedAmber
|
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
17
|
key_vector = Vector.new(keys)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
+
|
20
|
+
ary =
|
21
|
+
if vec.boolean?
|
22
|
+
key_vector.filter(*vec).to_a
|
23
|
+
elsif vec.numeric?
|
24
|
+
key_vector.take(*vec).to_a
|
25
|
+
elsif vec.string? || vec.dictionary?
|
26
|
+
picker
|
27
|
+
else
|
28
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
+
end
|
22
30
|
|
23
31
|
# DataFrame#[] creates a Vector with single key is specified.
|
24
32
|
# DataFrame#pick creates a DataFrame with single key.
|
25
|
-
|
26
|
-
|
27
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
33
|
+
DataFrame.new(@table[ary])
|
28
34
|
end
|
29
35
|
|
30
36
|
# drop some variables to create remainer sub DataFrame
|
@@ -38,24 +44,24 @@ module RedAmber
|
|
38
44
|
dropper.flatten!
|
39
45
|
|
40
46
|
key_vector = Vector.new(keys)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
if
|
45
|
-
key_vector.filter(*
|
46
|
-
elsif
|
47
|
-
keys - key_vector.take(*
|
48
|
-
|
47
|
+
vec = parse_to_vector(dropper, vsize: n_keys)
|
48
|
+
|
49
|
+
ary =
|
50
|
+
if vec.boolean?
|
51
|
+
key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
|
52
|
+
elsif vec.numeric?
|
53
|
+
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
+
elsif vec.string? || vec.dictionary?
|
49
55
|
keys - dropper
|
56
|
+
else
|
57
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
50
58
|
end
|
51
59
|
|
52
|
-
return DataFrame.new if
|
60
|
+
return DataFrame.new if ary.empty?
|
53
61
|
|
54
62
|
# DataFrame#[] creates a Vector with single key is specified.
|
55
63
|
# DataFrame#drop creates a DataFrame with single key.
|
56
|
-
|
57
|
-
|
58
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
64
|
+
DataFrame.new(@table[ary])
|
59
65
|
end
|
60
66
|
|
61
67
|
# rename variables to create a new DataFrame
|
data/lib/red_amber/group.rb
CHANGED
@@ -54,9 +54,11 @@ module RedAmber
|
|
54
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
55
55
|
|
56
56
|
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
-
df = df
|
58
|
-
# if counts are the same (
|
59
|
-
|
57
|
+
df = df.pick(@group_keys, df.keys - @group_keys)
|
58
|
+
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
59
|
+
if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
60
|
+
df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
61
|
+
end
|
60
62
|
df
|
61
63
|
end
|
62
64
|
end
|