red_amber 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +125 -0
- data/README.md +86 -269
- data/doc/DataFrame.md +427 -281
- data/doc/Vector.md +35 -54
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +33 -41
- data/lib/red_amber/data_frame_displayable.rb +59 -6
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +12 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +57 -20
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +50 -31
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +18 -9
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/Vector.md
CHANGED
@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
7
7
|
### Create from a column in a DataFrame
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
df =
|
10
|
+
df = DataFrame.new(x: [1, 2, 3])
|
11
11
|
df[:x]
|
12
12
|
# =>
|
13
13
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
|
@@ -17,13 +17,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
17
17
|
### New from an Array
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
vector =
|
20
|
+
vector = Vector.new([1, 2, 3])
|
21
21
|
# or
|
22
|
-
vector =
|
22
|
+
vector = Vector.new(1, 2, 3)
|
23
23
|
# or
|
24
|
-
vector =
|
24
|
+
vector = Vector.new(1..3)
|
25
25
|
# or
|
26
|
-
vector =
|
26
|
+
vector = Vector.new(Arrow::Array.new([1, 2, 3])
|
27
27
|
|
28
28
|
# =>
|
29
29
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
@@ -61,7 +61,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
61
61
|
|
62
62
|
### `type_class`
|
63
63
|
|
64
|
-
### `each`
|
64
|
+
### `each`, `map`, `collect`
|
65
65
|
|
66
66
|
If block is not given, returns Enumerator.
|
67
67
|
|
@@ -78,7 +78,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
78
78
|
- `limit` sets size limit to display a long array.
|
79
79
|
|
80
80
|
```ruby
|
81
|
-
vector =
|
81
|
+
vector = Vector.new((1..50).to_a)
|
82
82
|
# =>
|
83
83
|
#<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
|
84
84
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
|
@@ -95,8 +95,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
95
95
|
- Negative index is also OK like the Ruby's primitive Array.
|
96
96
|
|
97
97
|
```ruby
|
98
|
-
array =
|
99
|
-
indices =
|
98
|
+
array = Vector.new(%w[A B C D E])
|
99
|
+
indices = Vector.new([0.1, -0.5, -5.1])
|
100
100
|
array.take(indices)
|
101
101
|
# or
|
102
102
|
array[indices]
|
@@ -106,7 +106,7 @@ array[indices]
|
|
106
106
|
["A", "E", "A"]
|
107
107
|
```
|
108
108
|
|
109
|
-
### `filter(booleans)`, `[](booleans)`
|
109
|
+
### `filter(booleans)`, `select(booleans)`, `[](booleans)`
|
110
110
|
|
111
111
|
- Acceptable class for booleans:
|
112
112
|
- An array of true, false, or nil
|
@@ -114,7 +114,7 @@ array[indices]
|
|
114
114
|
- Arrow::BooleanArray
|
115
115
|
|
116
116
|
```ruby
|
117
|
-
array =
|
117
|
+
array = Vector.new(%w[A B C D E])
|
118
118
|
booleans = [true, false, nil, false, true]
|
119
119
|
array.filter(booleans)
|
120
120
|
# or
|
@@ -124,6 +124,7 @@ array[booleans]
|
|
124
124
|
#<RedAmber::Vector(:string, size=2):0x000000000000f21c>
|
125
125
|
["A", "E"]
|
126
126
|
```
|
127
|
+
`filter` and `select` also accepts a block.
|
127
128
|
|
128
129
|
## Functions
|
129
130
|
|
@@ -158,7 +159,7 @@ Options can be used as follows.
|
|
158
159
|
See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
|
159
160
|
|
160
161
|
```ruby
|
161
|
-
double =
|
162
|
+
double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
|
162
163
|
#=>
|
163
164
|
#<RedAmber::Vector(:double, size=6):0x000000000000f910>
|
164
165
|
[1.0, NaN, -Infinity, Infinity, nil, 0.0]
|
@@ -168,7 +169,7 @@ double.count(mode: :only_valid) #=> 5, default
|
|
168
169
|
double.count(mode: :only_null) #=> 1
|
169
170
|
double.count(mode: :all) #=> 6
|
170
171
|
|
171
|
-
boolean =
|
172
|
+
boolean = Vector.new([true, true, nil])
|
172
173
|
#=>
|
173
174
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
|
174
175
|
[true, true, nil]
|
@@ -187,8 +188,8 @@ boolean.all(skip_nulls: false) #=> false
|
|
187
188
|
| ✓ `-@` | | ✓ | | |as `-vector`|
|
188
189
|
| ✓ `negate` | | ✓ | | |`-@` |
|
189
190
|
| ✓ `abs` | | ✓ | | | |
|
190
|
-
|
|
191
|
-
|
|
191
|
+
| ✓ `acos` | | ✓ | | | |
|
192
|
+
| ✓ `asin` | | ✓ | | | |
|
192
193
|
| ✓ `atan` | | ✓ | | | |
|
193
194
|
| ✓ `bit_wise_not`| | (✓) | | |integer only|
|
194
195
|
| ✓ `ceil` | | ✓ | | | |
|
@@ -197,10 +198,10 @@ boolean.all(skip_nulls: false) #=> false
|
|
197
198
|
| ✓`fill_nil_forward` | ✓ | ✓ | ✓ | | |
|
198
199
|
| ✓ `floor` | | ✓ | | | |
|
199
200
|
| ✓ `invert` | ✓ | | | |`!`, alias `not`|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
201
|
+
| ✓ `ln` | | ✓ | | | |
|
202
|
+
| ✓ `log10` | | ✓ | | | |
|
203
|
+
| ✓ `log1p` | | ✓ | | |Compute natural log of (1+x)|
|
204
|
+
| ✓ `log2` | | ✓ | | | |
|
204
205
|
| ✓ `round` | | ✓ | | ✓ Round (:mode, :n_digits)| |
|
205
206
|
| ✓ `round_to_multiple`| | ✓ | | ✓ RoundToMultiple :mode, :multiple| multiple must be an Arrow::Scalar|
|
206
207
|
| ✓ `sign` | | ✓ | | | |
|
@@ -215,7 +216,7 @@ Examples of options for `#round`;
|
|
215
216
|
- `round_mode` Specify rounding mode.
|
216
217
|
|
217
218
|
```ruby
|
218
|
-
double =
|
219
|
+
double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
|
219
220
|
# => [15.15, 2.5, 3.5, -4.5, -5.5]
|
220
221
|
double.round
|
221
222
|
# => [15.0, 2.0, 4.0, -4.0, -6.0]
|
@@ -267,7 +268,7 @@ double.round(n_digits: -1)
|
|
267
268
|
| ✓ `is_valid` | ✓ | ✓ | ✓ | | |
|
268
269
|
| ✓ `less` | ✓ | ✓ | ✓ | |`<`, alias `lt`|
|
269
270
|
| ✓ `less_equal` | ✓ | ✓ | ✓ | |`<=`, alias `le`|
|
270
|
-
|
|
271
|
+
| ✓ `logb` | | ✓ | | |logb(b) Compute base `b` logarithm|
|
271
272
|
|[ ]`mod` | | [ ] | | | `%` |
|
272
273
|
| ✓ `multiply` | | ✓ | | | `*` |
|
273
274
|
| ✓ `not_equal` | ✓ | ✓ | ✓ | |`!=`, alias `ne`|
|
@@ -283,8 +284,6 @@ double.round(n_digits: -1)
|
|
283
284
|
|
284
285
|
Returns a new array with distinct elements.
|
285
286
|
|
286
|
-
(Not impremented functions)
|
287
|
-
|
288
287
|
### `tally` and `value_counts`
|
289
288
|
|
290
289
|
Compute counts of unique elements and return a Hash.
|
@@ -295,7 +294,7 @@ double.round(n_digits: -1)
|
|
295
294
|
array = [0.0/0, Float::NAN]
|
296
295
|
array.tally #=> {NaN=>1, NaN=>1}
|
297
296
|
|
298
|
-
vector =
|
297
|
+
vector = Vector.new(array)
|
299
298
|
vector.tally #=> {NaN=>2}
|
300
299
|
vector.value_counts #=> {NaN=>2}
|
301
300
|
```
|
@@ -309,19 +308,10 @@ double.round(n_digits: -1)
|
|
309
308
|
|
310
309
|
### `sort_indexes`, `sort_indices`, `array_sort_indices`
|
311
310
|
|
312
|
-
### [ ] `sort`, `sort_by`
|
313
|
-
### [ ] argmin, argmax
|
314
|
-
### [ ] (array functions)
|
315
|
-
### [ ] (strings functions)
|
316
|
-
### [ ] (temporal functions)
|
317
|
-
### [ ] (conditional functions)
|
318
|
-
### [ ] (index functions)
|
319
|
-
### [ ] (other functions)
|
320
|
-
|
321
311
|
## Coerce
|
322
312
|
|
323
313
|
```ruby
|
324
|
-
vector =
|
314
|
+
vector = Vector.new(1,2,3)
|
325
315
|
# =>
|
326
316
|
#<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
327
317
|
[1, 2, 3]
|
@@ -351,12 +341,13 @@ vector * -1
|
|
351
341
|
- Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
|
352
342
|
- Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
|
353
343
|
- Boolean specifiers specify the position of replacer in true.
|
344
|
+
- If booleans.any is false, no replacement happen and return self.
|
354
345
|
- Index specifiers specify the position of replacer in indices.
|
355
346
|
- replacer specifies the values to be replaced.
|
356
347
|
- The number of true in booleans must be equal to the length of replacer
|
357
348
|
|
358
349
|
```ruby
|
359
|
-
vector =
|
350
|
+
vector = Vector.new([1, 2, 3])
|
360
351
|
booleans = [true, false, true]
|
361
352
|
replacer = [4, 5]
|
362
353
|
vector.replace(booleans, replacer)
|
@@ -390,7 +381,7 @@ vector.replace(booleans, replacer)
|
|
390
381
|
```ruby
|
391
382
|
booleans = [true, false, nil]
|
392
383
|
replacer = -1
|
393
|
-
|
384
|
+
vector.replace(booleans, replacer)
|
394
385
|
=>
|
395
386
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
396
387
|
[-1, 2, nil]
|
@@ -401,17 +392,7 @@ vec.replace(booleans, replacer)
|
|
401
392
|
```ruby
|
402
393
|
booleans = [true, false, true]
|
403
394
|
replacer = [nil]
|
404
|
-
|
405
|
-
=>
|
406
|
-
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
407
|
-
[nil, 2, nil]
|
408
|
-
```
|
409
|
-
|
410
|
-
- If no replacer specified, it is same as to specify nil.
|
411
|
-
|
412
|
-
```ruby
|
413
|
-
booleans = [true, false, true]
|
414
|
-
vec.replace(booleans)
|
395
|
+
vector.replace(booleans, replacer)
|
415
396
|
=>
|
416
397
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
417
398
|
[nil, 2, nil]
|
@@ -420,7 +401,7 @@ vec.replace(booleans)
|
|
420
401
|
- An example to replace 'NA' to nil.
|
421
402
|
|
422
403
|
```ruby
|
423
|
-
vector =
|
404
|
+
vector = Vector.new(['A', 'B', 'NA'])
|
424
405
|
vector.replace(vector == 'NA', nil)
|
425
406
|
# =>
|
426
407
|
#<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
|
@@ -432,7 +413,7 @@ vector.replace(vector == 'NA', nil)
|
|
432
413
|
Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
|
433
414
|
|
434
415
|
```ruby
|
435
|
-
vector =
|
416
|
+
vector = Vector.new([1, 2, 3])
|
436
417
|
indices = [2, 1]
|
437
418
|
replacer = [4, 5]
|
438
419
|
vector.replace(indices, replacer)
|
@@ -448,7 +429,7 @@ Propagate the last valid observation forward (or backward).
|
|
448
429
|
Or preserve nil if all previous values are nil or at the end.
|
449
430
|
|
450
431
|
```ruby
|
451
|
-
integer =
|
432
|
+
integer = Vector.new([0, 1, nil, 3, nil])
|
452
433
|
integer.fill_nil_forward
|
453
434
|
# =>
|
454
435
|
#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
|
@@ -470,7 +451,7 @@ Choose values based on self. Self must be a boolean Vector.
|
|
470
451
|
This example will normalize negative indices to positive ones.
|
471
452
|
|
472
453
|
```ruby
|
473
|
-
indices =
|
454
|
+
indices = Vector.new([1, -1, 3, -4])
|
474
455
|
array_size = 10
|
475
456
|
normalized_indices = (indices < 0).if_else(indices + array_size, indices)
|
476
457
|
|
@@ -485,7 +466,7 @@ For each element in self, return true if it is found in given `values`, false ot
|
|
485
466
|
By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
|
486
467
|
|
487
468
|
```ruby
|
488
|
-
vector =
|
469
|
+
vector = Vector.new %W[A B C D]
|
489
470
|
values = ['A', 'C', 'X']
|
490
471
|
vector.is_in(values)
|
491
472
|
|
@@ -497,7 +478,7 @@ vector.is_in(values)
|
|
497
478
|
`values` are casted to the same Class of Vector.
|
498
479
|
|
499
480
|
```ruby
|
500
|
-
vector =
|
481
|
+
vector = Vector.new([1, 2, 255])
|
501
482
|
vector.is_in(1, -1)
|
502
483
|
|
503
484
|
# =>
|
@@ -510,7 +491,7 @@ vector.is_in(1, -1)
|
|
510
491
|
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
511
492
|
|
512
493
|
```ruby
|
513
|
-
vector =
|
494
|
+
vector = Vector.new([1, 2, 3, 4, 5])
|
514
495
|
vector.shift
|
515
496
|
|
516
497
|
# =>
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -7,6 +7,7 @@ module RedAmber
|
|
7
7
|
# mix-in
|
8
8
|
include DataFrameDisplayable
|
9
9
|
include DataFrameIndexable
|
10
|
+
include DataFrameLoadSave
|
10
11
|
include DataFrameReshaping
|
11
12
|
include DataFrameSelectable
|
12
13
|
include DataFrameVariableOperation
|
@@ -37,6 +38,13 @@ module RedAmber
|
|
37
38
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
38
39
|
# returns empty DataFrame
|
39
40
|
@table = Arrow::Table.new({}, [])
|
41
|
+
in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
|
42
|
+
table = arrowable.to_arrow
|
43
|
+
unless table.is_a?(Arrow::Table)
|
44
|
+
raise DataFrameTypeError,
|
45
|
+
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
46
|
+
end
|
47
|
+
@table = table
|
40
48
|
in [Arrow::Table => table]
|
41
49
|
@table = table
|
42
50
|
in [DataFrame => dataframe]
|
@@ -52,10 +60,9 @@ module RedAmber
|
|
52
60
|
@table = Arrow::Table.new(*args)
|
53
61
|
end
|
54
62
|
name_unnamed_keys
|
55
|
-
end
|
56
63
|
|
57
|
-
|
58
|
-
|
64
|
+
duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
|
65
|
+
raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
|
59
66
|
end
|
60
67
|
|
61
68
|
attr_reader :table
|
@@ -64,10 +71,6 @@ module RedAmber
|
|
64
71
|
@table
|
65
72
|
end
|
66
73
|
|
67
|
-
def save(output, options = {})
|
68
|
-
@table.save(output, options)
|
69
|
-
end
|
70
|
-
|
71
74
|
# Returns the number of rows.
|
72
75
|
#
|
73
76
|
# @return [Integer] Number of rows.
|
@@ -159,12 +162,19 @@ module RedAmber
|
|
159
162
|
@vectors || @vectors = init_instance_vars(:vectors)
|
160
163
|
end
|
161
164
|
|
162
|
-
# Returns row indices (
|
165
|
+
# Returns row indices (start...(size+start)) in an Array.
|
163
166
|
#
|
167
|
+
# @param start [Object]
|
168
|
+
# Object which have #succ method.
|
164
169
|
# @return [Array]
|
165
|
-
# An Array of
|
166
|
-
|
167
|
-
|
170
|
+
# An Array of indices of the row.
|
171
|
+
# @example
|
172
|
+
# (when self.size == 5)
|
173
|
+
# - indices #=> [0, 1, 2, 3, 4]
|
174
|
+
# - indices(1) #=> [1, 2, 3, 4, 5]
|
175
|
+
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
176
|
+
def indices(start = 0)
|
177
|
+
(start..).take(size)
|
168
178
|
end
|
169
179
|
alias_method :indexes, :indices
|
170
180
|
|
@@ -208,23 +218,24 @@ module RedAmber
|
|
208
218
|
Rover::DataFrame.new(to_h)
|
209
219
|
end
|
210
220
|
|
211
|
-
def to_iruby
|
212
|
-
require 'iruby'
|
213
|
-
return ['text/plain', '(empty DataFrame)'] if empty?
|
214
|
-
|
215
|
-
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
|
216
|
-
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
217
|
-
else
|
218
|
-
['text/html', html_table]
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
221
|
def group(*group_keys, &block)
|
223
222
|
g = Group.new(self, group_keys)
|
224
223
|
g = g.summarize(&block) if block
|
225
224
|
g
|
226
225
|
end
|
227
226
|
|
227
|
+
def method_missing(name, *args, &block)
|
228
|
+
return v(name) if args.empty?
|
229
|
+
|
230
|
+
super
|
231
|
+
end
|
232
|
+
|
233
|
+
def respond_to_missing?(name, include_private)
|
234
|
+
return true if key?(name)
|
235
|
+
|
236
|
+
super
|
237
|
+
end
|
238
|
+
|
228
239
|
private
|
229
240
|
|
230
241
|
# initialize @variable, @keys, @vectors and return one of them
|
@@ -241,25 +252,6 @@ module RedAmber
|
|
241
252
|
ary[%i[variables keys vectors].index(var)]
|
242
253
|
end
|
243
254
|
|
244
|
-
def html_table
|
245
|
-
reduced = size > 8 ? self[0..4, -4..-1] : self
|
246
|
-
|
247
|
-
converted = reduced.assign do
|
248
|
-
vectors.select.with_object({}) do |vector, assigner|
|
249
|
-
if vector.has_nil?
|
250
|
-
assigner[vector.key] = vector.to_a.map do |e|
|
251
|
-
e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
|
252
|
-
e = '""' if e.empty? # empty string
|
253
|
-
e.sub(/(\s+)/, '"\1"') # blank spaces
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
260
|
-
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
261
|
-
end
|
262
|
-
|
263
255
|
def name_unnamed_keys
|
264
256
|
return unless @table[:'']
|
265
257
|
|
@@ -37,8 +37,12 @@ module RedAmber
|
|
37
37
|
alias_method :describe, :summary
|
38
38
|
|
39
39
|
def inspect
|
40
|
-
|
40
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
|
+
case mode.upcase
|
42
|
+
when 'TDR'
|
41
43
|
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
44
|
+
when 'MINIMUM'
|
45
|
+
shape_str
|
42
46
|
else
|
43
47
|
"#<#{shape_str(with_id: true)}>\n#{self}"
|
44
48
|
end
|
@@ -55,6 +59,23 @@ module RedAmber
|
|
55
59
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
56
60
|
end
|
57
61
|
|
62
|
+
def to_iruby
|
63
|
+
require 'iruby'
|
64
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
65
|
+
|
66
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
67
|
+
case mode.upcase
|
68
|
+
when 'PLAIN'
|
69
|
+
['text/plain', inspect]
|
70
|
+
when 'MINIMUM'
|
71
|
+
['text/plain', shape_str]
|
72
|
+
when 'TDR'
|
73
|
+
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
74
|
+
else # 'TABLE'
|
75
|
+
['text/html', html_table]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
58
79
|
private # =====
|
59
80
|
|
60
81
|
def shape_str(with_id: false)
|
@@ -98,7 +119,7 @@ module RedAmber
|
|
98
119
|
else
|
99
120
|
[shorthand(vector, size, max_element)]
|
100
121
|
end
|
101
|
-
sio.printf header_format, i
|
122
|
+
sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
|
102
123
|
end
|
103
124
|
sio.string
|
104
125
|
end
|
@@ -154,9 +175,9 @@ module RedAmber
|
|
154
175
|
|
155
176
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
156
177
|
original = self
|
157
|
-
indices = size > head + tail ? [*0
|
178
|
+
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
179
|
df = slice(indices).assign do
|
159
|
-
assigner = { INDEX_KEY => indices.map
|
180
|
+
assigner = { INDEX_KEY => indices.map(&:to_s) }
|
160
181
|
vectors.each_with_object(assigner) do |v, a|
|
161
182
|
a[v.key] = v.to_a.map do |e|
|
162
183
|
if e.nil?
|
@@ -173,12 +194,12 @@ module RedAmber
|
|
173
194
|
end
|
174
195
|
|
175
196
|
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
176
|
-
df = size > head + tail ? df[0, 0, 0
|
197
|
+
df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
|
177
198
|
df = df.assign do
|
178
199
|
vectors.each_with_object({}) do |v, assigner|
|
179
200
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
180
201
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
181
|
-
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
202
|
+
assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
182
203
|
end
|
183
204
|
end
|
184
205
|
|
@@ -220,5 +241,37 @@ module RedAmber
|
|
220
241
|
"%#{width}s"
|
221
242
|
end
|
222
243
|
end
|
244
|
+
|
245
|
+
def html_table
|
246
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
247
|
+
|
248
|
+
converted = reduced.assign do
|
249
|
+
vectors.select.with_object({}) do |vector, assigner|
|
250
|
+
assigner[vector.key] = vector.map do |element|
|
251
|
+
case element
|
252
|
+
in TrueClass
|
253
|
+
'<i>(true)</i>'
|
254
|
+
in FalseClass
|
255
|
+
'<i>(false)</i>'
|
256
|
+
in NilClass
|
257
|
+
'<i>(nil)</i>'
|
258
|
+
in ''
|
259
|
+
'""'
|
260
|
+
in String
|
261
|
+
element.sub(/^(\s+)$/, '"\1"') # blank spaces
|
262
|
+
in Float
|
263
|
+
format('%g', element)
|
264
|
+
in Integer
|
265
|
+
format('%d', element)
|
266
|
+
else
|
267
|
+
element
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
274
|
+
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
275
|
+
end
|
223
276
|
end
|
224
277
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameLoadSave
|
6
|
+
# Enable `self.load` as class method of DataFrame
|
7
|
+
def self.included(klass)
|
8
|
+
klass.extend ClassMethods
|
9
|
+
end
|
10
|
+
|
11
|
+
# Enable `self.load` as class method of DataFrame
|
12
|
+
module ClassMethods
|
13
|
+
# Load DataFrame via Arrow::Table.load
|
14
|
+
def load(path, options = {})
|
15
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Save DataFrame
|
20
|
+
def save(output, options = {})
|
21
|
+
@table.save(output, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Save and reload to cast automatically
|
25
|
+
# Via tsv format file temporally as default
|
26
|
+
#
|
27
|
+
# experimental feature
|
28
|
+
def auto_cast(format: :tsv)
|
29
|
+
return self if empty?
|
30
|
+
|
31
|
+
tempfile = Arrow::ResizableBuffer.new(1024)
|
32
|
+
save(tempfile, format: format)
|
33
|
+
DataFrame.load(tempfile, format: format)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -5,20 +5,21 @@ module RedAmber
|
|
5
5
|
module DataFrameReshaping
|
6
6
|
# Transpose a wide DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol
|
8
|
+
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
|
-
# If it is
|
11
|
-
# @param new_key [Symbol
|
12
|
-
# If it is
|
10
|
+
# If it is not specified, keys[0] is used.
|
11
|
+
# @param new_key [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first,
|
15
|
-
raise DataFrameArgumentError, "
|
14
|
+
def transpose(key: keys.first, name: :NAME)
|
15
|
+
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
|
19
|
+
name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
|
21
|
+
names = (keys - [key]).map { |x| x&.to_s }
|
22
|
+
hash = { name => names }
|
22
23
|
i = keys.index(key)
|
23
24
|
each_row do |h|
|
24
25
|
k = h.values[i]
|
@@ -33,7 +34,7 @@ module RedAmber
|
|
33
34
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
35
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
36
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
37
|
+
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
37
38
|
not_included = keep_keys - keys
|
38
39
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
40
|
|
@@ -55,6 +56,7 @@ module RedAmber
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
59
|
+
hash[name] = hash[name].map { |x| x&.to_s }
|
58
60
|
DataFrame.new(hash)
|
59
61
|
end
|
60
62
|
|
@@ -63,7 +65,7 @@ module RedAmber
|
|
63
65
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
66
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
67
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
68
|
+
def to_wide(name: :NAME, value: :VALUE)
|
67
69
|
name = name.to_sym
|
68
70
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
71
|
|