red_amber 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +125 -0
- data/README.md +86 -269
- data/doc/DataFrame.md +427 -281
- data/doc/Vector.md +35 -54
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +33 -41
- data/lib/red_amber/data_frame_displayable.rb +59 -6
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +12 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +57 -20
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +50 -31
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +18 -9
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/Vector.md
CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
7
7
|
### Create from a column in a DataFrame
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
df =
|
10
|
+
df = DataFrame.new(x: [1, 2, 3])
|
11
11
|
df[:x]
|
12
12
|
# =>
|
13
13
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
|
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
17
17
|
### New from an Array
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
vector =
|
20
|
+
vector = Vector.new([1, 2, 3])
|
21
21
|
# or
|
22
|
-
vector =
|
22
|
+
vector = Vector.new(1, 2, 3)
|
23
23
|
# or
|
24
|
-
vector =
|
24
|
+
vector = Vector.new(1..3)
|
25
25
|
# or
|
26
|
-
vector =
|
26
|
+
vector = Vector.new(Arrow::Array.new([1, 2, 3])
|
27
27
|
|
28
28
|
# =>
|
29
29
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
61
61
|
|
62
62
|
### `type_class`
|
63
63
|
|
64
|
-
### `each`
|
64
|
+
### `each`, `map`, `collect`
|
65
65
|
|
66
66
|
If block is not given, returns Enumerator.
|
67
67
|
|
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
78
78
|
- `limit` sets size limit to display a long array.
|
79
79
|
|
80
80
|
```ruby
|
81
|
-
vector =
|
81
|
+
vector = Vector.new((1..50).to_a)
|
82
82
|
# =>
|
83
83
|
#<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
|
84
84
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
|
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
95
95
|
- Negative index is also OK like the Ruby's primitive Array.
|
96
96
|
|
97
97
|
```ruby
|
98
|
-
array =
|
99
|
-
indices =
|
98
|
+
array = Vector.new(%w[A B C D E])
|
99
|
+
indices = Vector.new([0.1, -0.5, -5.1])
|
100
100
|
array.take(indices)
|
101
101
|
# or
|
102
102
|
array[indices]
|
@@ -106,7 +106,7 @@ array[indices]
|
|
106
106
|
["A", "E", "A"]
|
107
107
|
```
|
108
108
|
|
109
|
-
### `filter(booleans)`, `[](booleans)`
|
109
|
+
### `filter(booleans)`, `select(booleans)`, `[](booleans)`
|
110
110
|
|
111
111
|
- Acceptable class for booleans:
|
112
112
|
- An array of true, false, or nil
|
@@ -114,7 +114,7 @@ array[indices]
|
|
114
114
|
- Arrow::BooleanArray
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
array =
|
117
|
+
array = Vector.new(%w[A B C D E])
|
118
118
|
booleans = [true, false, nil, false, true]
|
119
119
|
array.filter(booleans)
|
120
120
|
# or
|
@@ -124,6 +124,7 @@ array[booleans]
|
|
124
124
|
#<RedAmber::Vector(:string, size=2):0x000000000000f21c>
|
125
125
|
["A", "E"]
|
126
126
|
```
|
127
|
+
`filter` and `select` also accepts a block.
|
127
128
|
|
128
129
|
## Functions
|
129
130
|
|
@@ -158,7 +159,7 @@ Options can be used as follows.
|
|
158
159
|
See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
|
159
160
|
|
160
161
|
```ruby
|
161
|
-
double =
|
162
|
+
double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
|
162
163
|
#=>
|
163
164
|
#<RedAmber::Vector(:double, size=6):0x000000000000f910>
|
164
165
|
[1.0, NaN, -Infinity, Infinity, nil, 0.0]
|
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
|
|
168
169
|
double.count(mode: :only_null) #=> 1
|
169
170
|
double.count(mode: :all) #=> 6
|
170
171
|
|
171
|
-
boolean =
|
172
|
+
boolean = Vector.new([true, true, nil])
|
172
173
|
#=>
|
173
174
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
|
174
175
|
[true, true, nil]
|
@@ -187,8 +188,8 @@ boolean.all(skip_nulls: false) #=> false
|
|
187
188
|
| ✓ `-@` | | ✓ | | |as `-vector`|
|
188
189
|
| ✓ `negate` | | ✓ | | |`-@` |
|
189
190
|
| ✓ `abs` | | ✓ | | | |
|
190
|
-
|
|
191
|
-
|
|
191
|
+
| ✓ `acos` | | ✓ | | | |
|
192
|
+
| ✓ `asin` | | ✓ | | | |
|
192
193
|
| ✓ `atan` | | ✓ | | | |
|
193
194
|
| ✓ `bit_wise_not`| | (✓) | | |integer only|
|
194
195
|
| ✓ `ceil` | | ✓ | | | |
|
@@ -197,10 +198,10 @@ boolean.all(skip_nulls: false) #=> false
|
|
197
198
|
| ✓`fill_nil_forward` | ✓ | ✓ | ✓ | | |
|
198
199
|
| ✓ `floor` | | ✓ | | | |
|
199
200
|
| ✓ `invert` | ✓ | | | |`!`, alias `not`|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
201
|
+
| ✓ `ln` | | ✓ | | | |
|
202
|
+
| ✓ `log10` | | ✓ | | | |
|
203
|
+
| ✓ `log1p` | | ✓ | | |Compute natural log of (1+x)|
|
204
|
+
| ✓ `log2` | | ✓ | | | |
|
204
205
|
| ✓ `round` | | ✓ | | ✓ Round (:mode, :n_digits)| |
|
205
206
|
| ✓ `round_to_multiple`| | ✓ | | ✓ RoundToMultiple :mode, :multiple| multiple must be an Arrow::Scalar|
|
206
207
|
| ✓ `sign` | | ✓ | | | |
|
@@ -215,7 +216,7 @@ Examples of options for `#round`;
|
|
215
216
|
- `round_mode` Specify rounding mode.
|
216
217
|
|
217
218
|
```ruby
|
218
|
-
double =
|
219
|
+
double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
|
219
220
|
# => [15.15, 2.5, 3.5, -4.5, -5.5]
|
220
221
|
double.round
|
221
222
|
# => [15.0, 2.0, 4.0, -4.0, -6.0]
|
@@ -267,7 +268,7 @@ double.round(n_digits: -1)
|
|
267
268
|
| ✓ `is_valid` | ✓ | ✓ | ✓ | | |
|
268
269
|
| ✓ `less` | ✓ | ✓ | ✓ | |`<`, alias `lt`|
|
269
270
|
| ✓ `less_equal` | ✓ | ✓ | ✓ | |`<=`, alias `le`|
|
270
|
-
|
|
271
|
+
| ✓ `logb` | | ✓ | | |logb(b) Compute base `b` logarithm|
|
271
272
|
|[ ]`mod` | | [ ] | | | `%` |
|
272
273
|
| ✓ `multiply` | | ✓ | | | `*` |
|
273
274
|
| ✓ `not_equal` | ✓ | ✓ | ✓ | |`!=`, alias `ne`|
|
@@ -283,8 +284,6 @@ double.round(n_digits: -1)
|
|
283
284
|
|
284
285
|
Returns a new array with distinct elements.
|
285
286
|
|
286
|
-
(Not impremented functions)
|
287
|
-
|
288
287
|
### `tally` and `value_counts`
|
289
288
|
|
290
289
|
Compute counts of unique elements and return a Hash.
|
@@ -295,7 +294,7 @@ double.round(n_digits: -1)
|
|
295
294
|
array = [0.0/0, Float::NAN]
|
296
295
|
array.tally #=> {NaN=>1, NaN=>1}
|
297
296
|
|
298
|
-
vector =
|
297
|
+
vector = Vector.new(array)
|
299
298
|
vector.tally #=> {NaN=>2}
|
300
299
|
vector.value_counts #=> {NaN=>2}
|
301
300
|
```
|
@@ -309,19 +308,10 @@ double.round(n_digits: -1)
|
|
309
308
|
|
310
309
|
### `sort_indexes`, `sort_indices`, `array_sort_indices`
|
311
310
|
|
312
|
-
### [ ] `sort`, `sort_by`
|
313
|
-
### [ ] argmin, argmax
|
314
|
-
### [ ] (array functions)
|
315
|
-
### [ ] (strings functions)
|
316
|
-
### [ ] (temporal functions)
|
317
|
-
### [ ] (conditional functions)
|
318
|
-
### [ ] (index functions)
|
319
|
-
### [ ] (other functions)
|
320
|
-
|
321
311
|
## Coerce
|
322
312
|
|
323
313
|
```ruby
|
324
|
-
vector =
|
314
|
+
vector = Vector.new(1,2,3)
|
325
315
|
# =>
|
326
316
|
#<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
327
317
|
[1, 2, 3]
|
@@ -351,12 +341,13 @@ vector * -1
|
|
351
341
|
- Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
|
352
342
|
- Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
|
353
343
|
- Boolean specifiers specify the position of replacer in true.
|
344
|
+
- If booleans.any is false, no replacement happen and return self.
|
354
345
|
- Index specifiers specify the position of replacer in indices.
|
355
346
|
- replacer specifies the values to be replaced.
|
356
347
|
- The number of true in booleans must be equal to the length of replacer
|
357
348
|
|
358
349
|
```ruby
|
359
|
-
vector =
|
350
|
+
vector = Vector.new([1, 2, 3])
|
360
351
|
booleans = [true, false, true]
|
361
352
|
replacer = [4, 5]
|
362
353
|
vector.replace(booleans, replacer)
|
@@ -390,7 +381,7 @@ vector.replace(booleans, replacer)
|
|
390
381
|
```ruby
|
391
382
|
booleans = [true, false, nil]
|
392
383
|
replacer = -1
|
393
|
-
|
384
|
+
vector.replace(booleans, replacer)
|
394
385
|
=>
|
395
386
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
396
387
|
[-1, 2, nil]
|
@@ -401,17 +392,7 @@ vec.replace(booleans, replacer)
|
|
401
392
|
```ruby
|
402
393
|
booleans = [true, false, true]
|
403
394
|
replacer = [nil]
|
404
|
-
|
405
|
-
=>
|
406
|
-
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
407
|
-
[nil, 2, nil]
|
408
|
-
```
|
409
|
-
|
410
|
-
- If no replacer specified, it is same as to specify nil.
|
411
|
-
|
412
|
-
```ruby
|
413
|
-
booleans = [true, false, true]
|
414
|
-
vec.replace(booleans)
|
395
|
+
vector.replace(booleans, replacer)
|
415
396
|
=>
|
416
397
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
417
398
|
[nil, 2, nil]
|
@@ -420,7 +401,7 @@ vec.replace(booleans)
|
|
420
401
|
- An example to replace 'NA' to nil.
|
421
402
|
|
422
403
|
```ruby
|
423
|
-
vector =
|
404
|
+
vector = Vector.new(['A', 'B', 'NA'])
|
424
405
|
vector.replace(vector == 'NA', nil)
|
425
406
|
# =>
|
426
407
|
#<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
|
@@ -432,7 +413,7 @@ vector.replace(vector == 'NA', nil)
|
|
432
413
|
Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
|
433
414
|
|
434
415
|
```ruby
|
435
|
-
vector =
|
416
|
+
vector = Vector.new([1, 2, 3])
|
436
417
|
indices = [2, 1]
|
437
418
|
replacer = [4, 5]
|
438
419
|
vector.replace(indices, replacer)
|
@@ -448,7 +429,7 @@ Propagate the last valid observation forward (or backward).
|
|
448
429
|
Or preserve nil if all previous values are nil or at the end.
|
449
430
|
|
450
431
|
```ruby
|
451
|
-
integer =
|
432
|
+
integer = Vector.new([0, 1, nil, 3, nil])
|
452
433
|
integer.fill_nil_forward
|
453
434
|
# =>
|
454
435
|
#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
|
@@ -470,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
|
|
470
451
|
This example will normalize negative indices to positive ones.
|
471
452
|
|
472
453
|
```ruby
|
473
|
-
indices =
|
454
|
+
indices = Vector.new([1, -1, 3, -4])
|
474
455
|
array_size = 10
|
475
456
|
normalized_indices = (indices < 0).if_else(indices + array_size, indices)
|
476
457
|
|
@@ -485,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
|
|
485
466
|
By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
|
486
467
|
|
487
468
|
```ruby
|
488
|
-
vector =
|
469
|
+
vector = Vector.new %W[A B C D]
|
489
470
|
values = ['A', 'C', 'X']
|
490
471
|
vector.is_in(values)
|
491
472
|
|
@@ -497,7 +478,7 @@ vector.is_in(values)
|
|
497
478
|
`values` are casted to the same Class of Vector.
|
498
479
|
|
499
480
|
```ruby
|
500
|
-
vector =
|
481
|
+
vector = Vector.new([1, 2, 255])
|
501
482
|
vector.is_in(1, -1)
|
502
483
|
|
503
484
|
# =>
|
@@ -510,7 +491,7 @@ vector.is_in(1, -1)
|
|
510
491
|
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
511
492
|
|
512
493
|
```ruby
|
513
|
-
vector =
|
494
|
+
vector = Vector.new([1, 2, 3, 4, 5])
|
514
495
|
vector.shift
|
515
496
|
|
516
497
|
# =>
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -7,6 +7,7 @@ module RedAmber
|
|
7
7
|
# mix-in
|
8
8
|
include DataFrameDisplayable
|
9
9
|
include DataFrameIndexable
|
10
|
+
include DataFrameLoadSave
|
10
11
|
include DataFrameReshaping
|
11
12
|
include DataFrameSelectable
|
12
13
|
include DataFrameVariableOperation
|
@@ -37,6 +38,13 @@ module RedAmber
|
|
37
38
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
38
39
|
# returns empty DataFrame
|
39
40
|
@table = Arrow::Table.new({}, [])
|
41
|
+
in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
|
42
|
+
table = arrowable.to_arrow
|
43
|
+
unless table.is_a?(Arrow::Table)
|
44
|
+
raise DataFrameTypeError,
|
45
|
+
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
46
|
+
end
|
47
|
+
@table = table
|
40
48
|
in [Arrow::Table => table]
|
41
49
|
@table = table
|
42
50
|
in [DataFrame => dataframe]
|
@@ -52,10 +60,9 @@ module RedAmber
|
|
52
60
|
@table = Arrow::Table.new(*args)
|
53
61
|
end
|
54
62
|
name_unnamed_keys
|
55
|
-
end
|
56
63
|
|
57
|
-
|
58
|
-
|
64
|
+
duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
|
65
|
+
raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
|
59
66
|
end
|
60
67
|
|
61
68
|
attr_reader :table
|
@@ -64,10 +71,6 @@ module RedAmber
|
|
64
71
|
@table
|
65
72
|
end
|
66
73
|
|
67
|
-
def save(output, options = {})
|
68
|
-
@table.save(output, options)
|
69
|
-
end
|
70
|
-
|
71
74
|
# Returns the number of rows.
|
72
75
|
#
|
73
76
|
# @return [Integer] Number of rows.
|
@@ -159,12 +162,19 @@ module RedAmber
|
|
159
162
|
@vectors || @vectors = init_instance_vars(:vectors)
|
160
163
|
end
|
161
164
|
|
162
|
-
# Returns row indices (
|
165
|
+
# Returns row indices (start...(size+start)) in an Array.
|
163
166
|
#
|
167
|
+
# @param start [Object]
|
168
|
+
# Object which have #succ method.
|
164
169
|
# @return [Array]
|
165
|
-
# An Array of
|
166
|
-
|
167
|
-
|
170
|
+
# An Array of indices of the row.
|
171
|
+
# @example
|
172
|
+
# (when self.size == 5)
|
173
|
+
# - indices #=> [0, 1, 2, 3, 4]
|
174
|
+
# - indices(1) #=> [1, 2, 3, 4, 5]
|
175
|
+
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
176
|
+
def indices(start = 0)
|
177
|
+
(start..).take(size)
|
168
178
|
end
|
169
179
|
alias_method :indexes, :indices
|
170
180
|
|
@@ -208,23 +218,24 @@ module RedAmber
|
|
208
218
|
Rover::DataFrame.new(to_h)
|
209
219
|
end
|
210
220
|
|
211
|
-
def to_iruby
|
212
|
-
require 'iruby'
|
213
|
-
return ['text/plain', '(empty DataFrame)'] if empty?
|
214
|
-
|
215
|
-
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
|
216
|
-
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
217
|
-
else
|
218
|
-
['text/html', html_table]
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
221
|
def group(*group_keys, &block)
|
223
222
|
g = Group.new(self, group_keys)
|
224
223
|
g = g.summarize(&block) if block
|
225
224
|
g
|
226
225
|
end
|
227
226
|
|
227
|
+
def method_missing(name, *args, &block)
|
228
|
+
return v(name) if args.empty?
|
229
|
+
|
230
|
+
super
|
231
|
+
end
|
232
|
+
|
233
|
+
def respond_to_missing?(name, include_private)
|
234
|
+
return true if key?(name)
|
235
|
+
|
236
|
+
super
|
237
|
+
end
|
238
|
+
|
228
239
|
private
|
229
240
|
|
230
241
|
# initialize @variable, @keys, @vectors and return one of them
|
@@ -241,25 +252,6 @@ module RedAmber
|
|
241
252
|
ary[%i[variables keys vectors].index(var)]
|
242
253
|
end
|
243
254
|
|
244
|
-
def html_table
|
245
|
-
reduced = size > 8 ? self[0..4, -4..-1] : self
|
246
|
-
|
247
|
-
converted = reduced.assign do
|
248
|
-
vectors.select.with_object({}) do |vector, assigner|
|
249
|
-
if vector.has_nil?
|
250
|
-
assigner[vector.key] = vector.to_a.map do |e|
|
251
|
-
e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
|
252
|
-
e = '""' if e.empty? # empty string
|
253
|
-
e.sub(/(\s+)/, '"\1"') # blank spaces
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
260
|
-
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
261
|
-
end
|
262
|
-
|
263
255
|
def name_unnamed_keys
|
264
256
|
return unless @table[:'']
|
265
257
|
|
@@ -37,8 +37,12 @@ module RedAmber
|
|
37
37
|
alias_method :describe, :summary
|
38
38
|
|
39
39
|
def inspect
|
40
|
-
|
40
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
|
+
case mode.upcase
|
42
|
+
when 'TDR'
|
41
43
|
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
44
|
+
when 'MINIMUM'
|
45
|
+
shape_str
|
42
46
|
else
|
43
47
|
"#<#{shape_str(with_id: true)}>\n#{self}"
|
44
48
|
end
|
@@ -55,6 +59,23 @@ module RedAmber
|
|
55
59
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
56
60
|
end
|
57
61
|
|
62
|
+
def to_iruby
|
63
|
+
require 'iruby'
|
64
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
65
|
+
|
66
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
67
|
+
case mode.upcase
|
68
|
+
when 'PLAIN'
|
69
|
+
['text/plain', inspect]
|
70
|
+
when 'MINIMUM'
|
71
|
+
['text/plain', shape_str]
|
72
|
+
when 'TDR'
|
73
|
+
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
74
|
+
else # 'TABLE'
|
75
|
+
['text/html', html_table]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
58
79
|
private # =====
|
59
80
|
|
60
81
|
def shape_str(with_id: false)
|
@@ -98,7 +119,7 @@ module RedAmber
|
|
98
119
|
else
|
99
120
|
[shorthand(vector, size, max_element)]
|
100
121
|
end
|
101
|
-
sio.printf header_format, i
|
122
|
+
sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
|
102
123
|
end
|
103
124
|
sio.string
|
104
125
|
end
|
@@ -154,9 +175,9 @@ module RedAmber
|
|
154
175
|
|
155
176
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
156
177
|
original = self
|
157
|
-
indices = size > head + tail ? [*0
|
178
|
+
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
179
|
df = slice(indices).assign do
|
159
|
-
assigner = { INDEX_KEY => indices.map
|
180
|
+
assigner = { INDEX_KEY => indices.map(&:to_s) }
|
160
181
|
vectors.each_with_object(assigner) do |v, a|
|
161
182
|
a[v.key] = v.to_a.map do |e|
|
162
183
|
if e.nil?
|
@@ -173,12 +194,12 @@ module RedAmber
|
|
173
194
|
end
|
174
195
|
|
175
196
|
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
176
|
-
df = size > head + tail ? df[0, 0, 0
|
197
|
+
df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
|
177
198
|
df = df.assign do
|
178
199
|
vectors.each_with_object({}) do |v, assigner|
|
179
200
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
180
201
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
181
|
-
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
202
|
+
assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
182
203
|
end
|
183
204
|
end
|
184
205
|
|
@@ -220,5 +241,37 @@ module RedAmber
|
|
220
241
|
"%#{width}s"
|
221
242
|
end
|
222
243
|
end
|
244
|
+
|
245
|
+
def html_table
|
246
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
247
|
+
|
248
|
+
converted = reduced.assign do
|
249
|
+
vectors.select.with_object({}) do |vector, assigner|
|
250
|
+
assigner[vector.key] = vector.map do |element|
|
251
|
+
case element
|
252
|
+
in TrueClass
|
253
|
+
'<i>(true)</i>'
|
254
|
+
in FalseClass
|
255
|
+
'<i>(false)</i>'
|
256
|
+
in NilClass
|
257
|
+
'<i>(nil)</i>'
|
258
|
+
in ''
|
259
|
+
'""'
|
260
|
+
in String
|
261
|
+
element.sub(/^(\s+)$/, '"\1"') # blank spaces
|
262
|
+
in Float
|
263
|
+
format('%g', element)
|
264
|
+
in Integer
|
265
|
+
format('%d', element)
|
266
|
+
else
|
267
|
+
element
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
274
|
+
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
275
|
+
end
|
223
276
|
end
|
224
277
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameLoadSave
|
6
|
+
# Enable `self.load` as class method of DataFrame
|
7
|
+
def self.included(klass)
|
8
|
+
klass.extend ClassMethods
|
9
|
+
end
|
10
|
+
|
11
|
+
# Enable `self.load` as class method of DataFrame
|
12
|
+
module ClassMethods
|
13
|
+
# Load DataFrame via Arrow::Table.load
|
14
|
+
def load(path, options = {})
|
15
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Save DataFrame
|
20
|
+
def save(output, options = {})
|
21
|
+
@table.save(output, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Save and reload to cast automatically
|
25
|
+
# Via tsv format file temporally as default
|
26
|
+
#
|
27
|
+
# experimental feature
|
28
|
+
def auto_cast(format: :tsv)
|
29
|
+
return self if empty?
|
30
|
+
|
31
|
+
tempfile = Arrow::ResizableBuffer.new(1024)
|
32
|
+
save(tempfile, format: format)
|
33
|
+
DataFrame.load(tempfile, format: format)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -5,20 +5,21 @@ module RedAmber
|
|
5
5
|
module DataFrameReshaping
|
6
6
|
# Transpose a wide DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol
|
8
|
+
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
|
-
# If it is
|
11
|
-
# @param new_key [Symbol
|
12
|
-
# If it is
|
10
|
+
# If it is not specified, keys[0] is used.
|
11
|
+
# @param new_key [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first,
|
15
|
-
raise DataFrameArgumentError, "
|
14
|
+
def transpose(key: keys.first, name: :NAME)
|
15
|
+
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
|
19
|
+
name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
|
21
|
+
names = (keys - [key]).map { |x| x&.to_s }
|
22
|
+
hash = { name => names }
|
22
23
|
i = keys.index(key)
|
23
24
|
each_row do |h|
|
24
25
|
k = h.values[i]
|
@@ -33,7 +34,7 @@ module RedAmber
|
|
33
34
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
35
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
36
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
37
|
+
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
37
38
|
not_included = keep_keys - keys
|
38
39
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
40
|
|
@@ -55,6 +56,7 @@ module RedAmber
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
59
|
+
hash[name] = hash[name].map { |x| x&.to_s }
|
58
60
|
DataFrame.new(hash)
|
59
61
|
end
|
60
62
|
|
@@ -63,7 +65,7 @@ module RedAmber
|
|
63
65
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
66
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
67
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
68
|
+
def to_wide(name: :NAME, value: :VALUE)
|
67
69
|
name = name.to_sym
|
68
70
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
71
|
|