red_amber 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +69 -2
- data/README.md +83 -280
- data/doc/DataFrame.md +279 -265
- data/doc/Vector.md +28 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +10 -37
- data/lib/red_amber/data_frame_displayable.rb +56 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_variable_operation.rb +25 -19
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +49 -30
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -8979
data/doc/Vector.md
CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
7
7
|
### Create from a column in a DataFrame
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
df =
|
10
|
+
df = DataFrame.new(x: [1, 2, 3])
|
11
11
|
df[:x]
|
12
12
|
# =>
|
13
13
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
|
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
17
17
|
### New from an Array
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
vector =
|
20
|
+
vector = Vector.new([1, 2, 3])
|
21
21
|
# or
|
22
|
-
vector =
|
22
|
+
vector = Vector.new(1, 2, 3)
|
23
23
|
# or
|
24
|
-
vector =
|
24
|
+
vector = Vector.new(1..3)
|
25
25
|
# or
|
26
|
-
vector =
|
26
|
+
vector = Vector.new(Arrow::Array.new([1, 2, 3])
|
27
27
|
|
28
28
|
# =>
|
29
29
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
61
61
|
|
62
62
|
### `type_class`
|
63
63
|
|
64
|
-
### `each`
|
64
|
+
### `each`, `map`, `collect`
|
65
65
|
|
66
66
|
If block is not given, returns Enumerator.
|
67
67
|
|
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
78
78
|
- `limit` sets size limit to display a long array.
|
79
79
|
|
80
80
|
```ruby
|
81
|
-
vector =
|
81
|
+
vector = Vector.new((1..50).to_a)
|
82
82
|
# =>
|
83
83
|
#<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
|
84
84
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
|
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
95
95
|
- Negative index is also OK like the Ruby's primitive Array.
|
96
96
|
|
97
97
|
```ruby
|
98
|
-
array =
|
99
|
-
indices =
|
98
|
+
array = Vector.new(%w[A B C D E])
|
99
|
+
indices = Vector.new([0.1, -0.5, -5.1])
|
100
100
|
array.take(indices)
|
101
101
|
# or
|
102
102
|
array[indices]
|
@@ -106,7 +106,7 @@ array[indices]
|
|
106
106
|
["A", "E", "A"]
|
107
107
|
```
|
108
108
|
|
109
|
-
### `filter(booleans)`, `[](booleans)`
|
109
|
+
### `filter(booleans)`, `select(booleans)`, `[](booleans)`
|
110
110
|
|
111
111
|
- Acceptable class for booleans:
|
112
112
|
- An array of true, false, or nil
|
@@ -114,7 +114,7 @@ array[indices]
|
|
114
114
|
- Arrow::BooleanArray
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
array =
|
117
|
+
array = Vector.new(%w[A B C D E])
|
118
118
|
booleans = [true, false, nil, false, true]
|
119
119
|
array.filter(booleans)
|
120
120
|
# or
|
@@ -124,6 +124,7 @@ array[booleans]
|
|
124
124
|
#<RedAmber::Vector(:string, size=2):0x000000000000f21c>
|
125
125
|
["A", "E"]
|
126
126
|
```
|
127
|
+
`filter` and `select` also accepts a block.
|
127
128
|
|
128
129
|
## Functions
|
129
130
|
|
@@ -158,7 +159,7 @@ Options can be used as follows.
|
|
158
159
|
See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
|
159
160
|
|
160
161
|
```ruby
|
161
|
-
double =
|
162
|
+
double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
|
162
163
|
#=>
|
163
164
|
#<RedAmber::Vector(:double, size=6):0x000000000000f910>
|
164
165
|
[1.0, NaN, -Infinity, Infinity, nil, 0.0]
|
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
|
|
168
169
|
double.count(mode: :only_null) #=> 1
|
169
170
|
double.count(mode: :all) #=> 6
|
170
171
|
|
171
|
-
boolean =
|
172
|
+
boolean = Vector.new([true, true, nil])
|
172
173
|
#=>
|
173
174
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
|
174
175
|
[true, true, nil]
|
@@ -215,7 +216,7 @@ Examples of options for `#round`;
|
|
215
216
|
- `round_mode` Specify rounding mode.
|
216
217
|
|
217
218
|
```ruby
|
218
|
-
double =
|
219
|
+
double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
|
219
220
|
# => [15.15, 2.5, 3.5, -4.5, -5.5]
|
220
221
|
double.round
|
221
222
|
# => [15.0, 2.0, 4.0, -4.0, -6.0]
|
@@ -293,7 +294,7 @@ double.round(n_digits: -1)
|
|
293
294
|
array = [0.0/0, Float::NAN]
|
294
295
|
array.tally #=> {NaN=>1, NaN=>1}
|
295
296
|
|
296
|
-
vector =
|
297
|
+
vector = Vector.new(array)
|
297
298
|
vector.tally #=> {NaN=>2}
|
298
299
|
vector.value_counts #=> {NaN=>2}
|
299
300
|
```
|
@@ -310,7 +311,7 @@ double.round(n_digits: -1)
|
|
310
311
|
## Coerce
|
311
312
|
|
312
313
|
```ruby
|
313
|
-
vector =
|
314
|
+
vector = Vector.new(1,2,3)
|
314
315
|
# =>
|
315
316
|
#<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
316
317
|
[1, 2, 3]
|
@@ -340,12 +341,13 @@ vector * -1
|
|
340
341
|
- Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
|
341
342
|
- Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
|
342
343
|
- Boolean specifiers specify the position of replacer in true.
|
344
|
+
- If booleans.any is false, no replacement happen and return self.
|
343
345
|
- Index specifiers specify the position of replacer in indices.
|
344
346
|
- replacer specifies the values to be replaced.
|
345
347
|
- The number of true in booleans must be equal to the length of replacer
|
346
348
|
|
347
349
|
```ruby
|
348
|
-
vector =
|
350
|
+
vector = Vector.new([1, 2, 3])
|
349
351
|
booleans = [true, false, true]
|
350
352
|
replacer = [4, 5]
|
351
353
|
vector.replace(booleans, replacer)
|
@@ -379,7 +381,7 @@ vector.replace(booleans, replacer)
|
|
379
381
|
```ruby
|
380
382
|
booleans = [true, false, nil]
|
381
383
|
replacer = -1
|
382
|
-
|
384
|
+
vector.replace(booleans, replacer)
|
383
385
|
=>
|
384
386
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
385
387
|
[-1, 2, nil]
|
@@ -390,17 +392,7 @@ vec.replace(booleans, replacer)
|
|
390
392
|
```ruby
|
391
393
|
booleans = [true, false, true]
|
392
394
|
replacer = [nil]
|
393
|
-
|
394
|
-
=>
|
395
|
-
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
396
|
-
[nil, 2, nil]
|
397
|
-
```
|
398
|
-
|
399
|
-
- If no replacer specified, it is same as to specify nil.
|
400
|
-
|
401
|
-
```ruby
|
402
|
-
booleans = [true, false, true]
|
403
|
-
vec.replace(booleans)
|
395
|
+
vector.replace(booleans, replacer)
|
404
396
|
=>
|
405
397
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
406
398
|
[nil, 2, nil]
|
@@ -409,7 +401,7 @@ vec.replace(booleans)
|
|
409
401
|
- An example to replace 'NA' to nil.
|
410
402
|
|
411
403
|
```ruby
|
412
|
-
vector =
|
404
|
+
vector = Vector.new(['A', 'B', 'NA'])
|
413
405
|
vector.replace(vector == 'NA', nil)
|
414
406
|
# =>
|
415
407
|
#<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
|
@@ -421,7 +413,7 @@ vector.replace(vector == 'NA', nil)
|
|
421
413
|
Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
|
422
414
|
|
423
415
|
```ruby
|
424
|
-
vector =
|
416
|
+
vector = Vector.new([1, 2, 3])
|
425
417
|
indices = [2, 1]
|
426
418
|
replacer = [4, 5]
|
427
419
|
vector.replace(indices, replacer)
|
@@ -437,7 +429,7 @@ Propagate the last valid observation forward (or backward).
|
|
437
429
|
Or preserve nil if all previous values are nil or at the end.
|
438
430
|
|
439
431
|
```ruby
|
440
|
-
integer =
|
432
|
+
integer = Vector.new([0, 1, nil, 3, nil])
|
441
433
|
integer.fill_nil_forward
|
442
434
|
# =>
|
443
435
|
#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
|
@@ -459,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
|
|
459
451
|
This example will normalize negative indices to positive ones.
|
460
452
|
|
461
453
|
```ruby
|
462
|
-
indices =
|
454
|
+
indices = Vector.new([1, -1, 3, -4])
|
463
455
|
array_size = 10
|
464
456
|
normalized_indices = (indices < 0).if_else(indices + array_size, indices)
|
465
457
|
|
@@ -474,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
|
|
474
466
|
By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
|
475
467
|
|
476
468
|
```ruby
|
477
|
-
vector =
|
469
|
+
vector = Vector.new %W[A B C D]
|
478
470
|
values = ['A', 'C', 'X']
|
479
471
|
vector.is_in(values)
|
480
472
|
|
@@ -486,7 +478,7 @@ vector.is_in(values)
|
|
486
478
|
`values` are casted to the same Class of Vector.
|
487
479
|
|
488
480
|
```ruby
|
489
|
-
vector =
|
481
|
+
vector = Vector.new([1, 2, 255])
|
490
482
|
vector.is_in(1, -1)
|
491
483
|
|
492
484
|
# =>
|
@@ -499,7 +491,7 @@ vector.is_in(1, -1)
|
|
499
491
|
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
500
492
|
|
501
493
|
```ruby
|
502
|
-
vector =
|
494
|
+
vector = Vector.new([1, 2, 3, 4, 5])
|
503
495
|
vector.shift
|
504
496
|
|
505
497
|
# =>
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -7,6 +7,7 @@ module RedAmber
|
|
7
7
|
# mix-in
|
8
8
|
include DataFrameDisplayable
|
9
9
|
include DataFrameIndexable
|
10
|
+
include DataFrameLoadSave
|
10
11
|
include DataFrameReshaping
|
11
12
|
include DataFrameSelectable
|
12
13
|
include DataFrameVariableOperation
|
@@ -37,6 +38,13 @@ module RedAmber
|
|
37
38
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
38
39
|
# returns empty DataFrame
|
39
40
|
@table = Arrow::Table.new({}, [])
|
41
|
+
in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
|
42
|
+
table = arrowable.to_arrow
|
43
|
+
unless table.is_a?(Arrow::Table)
|
44
|
+
raise DataFrameTypeError,
|
45
|
+
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
46
|
+
end
|
47
|
+
@table = table
|
40
48
|
in [Arrow::Table => table]
|
41
49
|
@table = table
|
42
50
|
in [DataFrame => dataframe]
|
@@ -52,10 +60,9 @@ module RedAmber
|
|
52
60
|
@table = Arrow::Table.new(*args)
|
53
61
|
end
|
54
62
|
name_unnamed_keys
|
55
|
-
end
|
56
63
|
|
57
|
-
|
58
|
-
|
64
|
+
duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
|
65
|
+
raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
|
59
66
|
end
|
60
67
|
|
61
68
|
attr_reader :table
|
@@ -64,10 +71,6 @@ module RedAmber
|
|
64
71
|
@table
|
65
72
|
end
|
66
73
|
|
67
|
-
def save(output, options = {})
|
68
|
-
@table.save(output, options)
|
69
|
-
end
|
70
|
-
|
71
74
|
# Returns the number of rows.
|
72
75
|
#
|
73
76
|
# @return [Integer] Number of rows.
|
@@ -215,17 +218,6 @@ module RedAmber
|
|
215
218
|
Rover::DataFrame.new(to_h)
|
216
219
|
end
|
217
220
|
|
218
|
-
def to_iruby
|
219
|
-
require 'iruby'
|
220
|
-
return ['text/plain', '(empty DataFrame)'] if empty?
|
221
|
-
|
222
|
-
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
|
223
|
-
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
224
|
-
else
|
225
|
-
['text/html', html_table]
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
221
|
def group(*group_keys, &block)
|
230
222
|
g = Group.new(self, group_keys)
|
231
223
|
g = g.summarize(&block) if block
|
@@ -260,25 +252,6 @@ module RedAmber
|
|
260
252
|
ary[%i[variables keys vectors].index(var)]
|
261
253
|
end
|
262
254
|
|
263
|
-
def html_table
|
264
|
-
reduced = size > 8 ? self[0..4, -4..-1] : self
|
265
|
-
|
266
|
-
converted = reduced.assign do
|
267
|
-
vectors.select.with_object({}) do |vector, assigner|
|
268
|
-
if vector.has_nil?
|
269
|
-
assigner[vector.key] = vector.to_a.map do |e|
|
270
|
-
e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
|
271
|
-
e = '""' if e.empty? # empty string
|
272
|
-
e.sub(/(\s+)/, '"\1"') # blank spaces
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
279
|
-
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
280
|
-
end
|
281
|
-
|
282
255
|
def name_unnamed_keys
|
283
256
|
return unless @table[:'']
|
284
257
|
|
@@ -37,8 +37,12 @@ module RedAmber
|
|
37
37
|
alias_method :describe, :summary
|
38
38
|
|
39
39
|
def inspect
|
40
|
-
|
40
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
|
+
case mode.upcase
|
42
|
+
when 'TDR'
|
41
43
|
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
44
|
+
when 'MINIMUM'
|
45
|
+
shape_str
|
42
46
|
else
|
43
47
|
"#<#{shape_str(with_id: true)}>\n#{self}"
|
44
48
|
end
|
@@ -55,6 +59,23 @@ module RedAmber
|
|
55
59
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
56
60
|
end
|
57
61
|
|
62
|
+
def to_iruby
|
63
|
+
require 'iruby'
|
64
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
65
|
+
|
66
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
67
|
+
case mode.upcase
|
68
|
+
when 'PLAIN'
|
69
|
+
['text/plain', inspect]
|
70
|
+
when 'MINIMUM'
|
71
|
+
['text/plain', shape_str]
|
72
|
+
when 'TDR'
|
73
|
+
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
74
|
+
else # 'TABLE'
|
75
|
+
['text/html', html_table]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
58
79
|
private # =====
|
59
80
|
|
60
81
|
def shape_str(with_id: false)
|
@@ -98,7 +119,7 @@ module RedAmber
|
|
98
119
|
else
|
99
120
|
[shorthand(vector, size, max_element)]
|
100
121
|
end
|
101
|
-
sio.printf header_format, i
|
122
|
+
sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
|
102
123
|
end
|
103
124
|
sio.string
|
104
125
|
end
|
@@ -156,7 +177,7 @@ module RedAmber
|
|
156
177
|
original = self
|
157
178
|
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
179
|
df = slice(indices).assign do
|
159
|
-
assigner = { INDEX_KEY => indices.map
|
180
|
+
assigner = { INDEX_KEY => indices.map(&:to_s) }
|
160
181
|
vectors.each_with_object(assigner) do |v, a|
|
161
182
|
a[v.key] = v.to_a.map do |e|
|
162
183
|
if e.nil?
|
@@ -220,5 +241,37 @@ module RedAmber
|
|
220
241
|
"%#{width}s"
|
221
242
|
end
|
222
243
|
end
|
244
|
+
|
245
|
+
def html_table
|
246
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
247
|
+
|
248
|
+
converted = reduced.assign do
|
249
|
+
vectors.select.with_object({}) do |vector, assigner|
|
250
|
+
assigner[vector.key] = vector.map do |element|
|
251
|
+
case element
|
252
|
+
in TrueClass
|
253
|
+
'<i>(true)</i>'
|
254
|
+
in FalseClass
|
255
|
+
'<i>(false)</i>'
|
256
|
+
in NilClass
|
257
|
+
'<i>(nil)</i>'
|
258
|
+
in ''
|
259
|
+
'""'
|
260
|
+
in String
|
261
|
+
element.sub(/^(\s+)$/, '"\1"') # blank spaces
|
262
|
+
in Float
|
263
|
+
format('%g', element)
|
264
|
+
in Integer
|
265
|
+
format('%d', element)
|
266
|
+
else
|
267
|
+
element
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
274
|
+
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
275
|
+
end
|
223
276
|
end
|
224
277
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameLoadSave
|
6
|
+
# Enable `self.load` as class method of DataFrame
|
7
|
+
def self.included(klass)
|
8
|
+
klass.extend ClassMethods
|
9
|
+
end
|
10
|
+
|
11
|
+
# Enable `self.load` as class method of DataFrame
|
12
|
+
module ClassMethods
|
13
|
+
# Load DataFrame via Arrow::Table.load
|
14
|
+
def load(path, options = {})
|
15
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Save DataFrame
|
20
|
+
def save(output, options = {})
|
21
|
+
@table.save(output, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Save and reload to cast automatically
|
25
|
+
# Via tsv format file temporally as default
|
26
|
+
#
|
27
|
+
# experimental feature
|
28
|
+
def auto_cast(format: :tsv)
|
29
|
+
return self if empty?
|
30
|
+
|
31
|
+
tempfile = Arrow::ResizableBuffer.new(1024)
|
32
|
+
save(tempfile, format: format)
|
33
|
+
DataFrame.load(tempfile, format: format)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -9,16 +9,17 @@ module RedAmber
|
|
9
9
|
# to transepose into keys.
|
10
10
|
# If it is not specified, keys[0] is used.
|
11
11
|
# @param new_key [Symbol] key name of transposed index column.
|
12
|
-
# If it is not specified, :
|
12
|
+
# If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first, name: :
|
14
|
+
def transpose(key: keys.first, name: :NAME)
|
15
15
|
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
name = (:
|
19
|
+
name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
|
21
|
+
names = (keys - [key]).map { |x| x&.to_s }
|
22
|
+
hash = { name => names }
|
22
23
|
i = keys.index(key)
|
23
24
|
each_row do |h|
|
24
25
|
k = h.values[i]
|
@@ -33,7 +34,7 @@ module RedAmber
|
|
33
34
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
35
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
36
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
37
|
+
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
37
38
|
not_included = keep_keys - keys
|
38
39
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
40
|
|
@@ -55,6 +56,7 @@ module RedAmber
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
59
|
+
hash[name] = hash[name].map { |x| x&.to_s }
|
58
60
|
DataFrame.new(hash)
|
59
61
|
end
|
60
62
|
|
@@ -63,7 +65,7 @@ module RedAmber
|
|
63
65
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
66
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
67
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
68
|
+
def to_wide(name: :NAME, value: :VALUE)
|
67
69
|
name = name.to_sym
|
68
70
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
71
|
|
@@ -15,16 +15,22 @@ module RedAmber
|
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
17
|
key_vector = Vector.new(keys)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
+
|
20
|
+
ary =
|
21
|
+
if vec.boolean?
|
22
|
+
key_vector.filter(*vec).to_a
|
23
|
+
elsif vec.numeric?
|
24
|
+
key_vector.take(*vec).to_a
|
25
|
+
elsif vec.string? || vec.dictionary?
|
26
|
+
picker
|
27
|
+
else
|
28
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
+
end
|
22
30
|
|
23
31
|
# DataFrame#[] creates a Vector with single key is specified.
|
24
32
|
# DataFrame#pick creates a DataFrame with single key.
|
25
|
-
|
26
|
-
|
27
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
33
|
+
DataFrame.new(@table[ary])
|
28
34
|
end
|
29
35
|
|
30
36
|
# drop some variables to create remainer sub DataFrame
|
@@ -38,24 +44,24 @@ module RedAmber
|
|
38
44
|
dropper.flatten!
|
39
45
|
|
40
46
|
key_vector = Vector.new(keys)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
if
|
45
|
-
key_vector.filter(*
|
46
|
-
elsif
|
47
|
-
keys - key_vector.take(*
|
48
|
-
|
47
|
+
vec = parse_to_vector(dropper, vsize: n_keys)
|
48
|
+
|
49
|
+
ary =
|
50
|
+
if vec.boolean?
|
51
|
+
key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
|
52
|
+
elsif vec.numeric?
|
53
|
+
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
+
elsif vec.string? || vec.dictionary?
|
49
55
|
keys - dropper
|
56
|
+
else
|
57
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
50
58
|
end
|
51
59
|
|
52
|
-
return DataFrame.new if
|
60
|
+
return DataFrame.new if ary.empty?
|
53
61
|
|
54
62
|
# DataFrame#[] creates a Vector with single key is specified.
|
55
63
|
# DataFrame#drop creates a DataFrame with single key.
|
56
|
-
|
57
|
-
|
58
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
64
|
+
DataFrame.new(@table[ary])
|
59
65
|
end
|
60
66
|
|
61
67
|
# rename variables to create a new DataFrame
|
data/lib/red_amber/group.rb
CHANGED
@@ -54,9 +54,11 @@ module RedAmber
|
|
54
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
55
55
|
|
56
56
|
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
-
df = df
|
58
|
-
# if counts are the same (
|
59
|
-
|
57
|
+
df = df.pick(@group_keys, df.keys - @group_keys)
|
58
|
+
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
59
|
+
if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
60
|
+
df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
61
|
+
end
|
60
62
|
df
|
61
63
|
end
|
62
64
|
end
|