red_amber 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -5
- data/CHANGELOG.md +104 -4
- data/README.md +18 -16
- data/benchmark/basic.yml +8 -8
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -6
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +21 -0
- data/docker/Gemfile.lock +80 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +74 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +25 -10
- data/lib/red_amber/data_frame_combinable.rb +117 -73
- data/lib/red_amber/data_frame_displayable.rb +100 -51
- data/lib/red_amber/data_frame_indexable.rb +4 -4
- data/lib/red_amber/data_frame_reshaping.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +1 -4
- data/lib/red_amber/data_frame_variable_operation.rb +7 -2
- data/lib/red_amber/group.rb +17 -18
- data/lib/red_amber/helper.rb +4 -4
- data/lib/red_amber/refinements.rb +15 -2
- data/lib/red_amber/subframes.rb +319 -191
- data/lib/red_amber/vector.rb +7 -30
- data/lib/red_amber/vector_binary_element_wise.rb +149 -1
- data/lib/red_amber/vector_selectable.rb +49 -12
- data/lib/red_amber/vector_unary_element_wise.rb +93 -0
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +16 -7
data/lib/red_amber/subframes.rb
CHANGED
@@ -10,6 +10,38 @@ module RedAmber
|
|
10
10
|
using RefineArray
|
11
11
|
using RefineArrayLike
|
12
12
|
|
13
|
+
# Entity to select sub-dataframes
|
14
|
+
class Selectors
|
15
|
+
attr_reader :selectors, :size, :sizes
|
16
|
+
|
17
|
+
def initialize(selectors)
|
18
|
+
@selectors = selectors
|
19
|
+
@size = selectors.size
|
20
|
+
@sizes = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def each
|
24
|
+
@selectors.each
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Boolean selectors of sub-dataframes
|
29
|
+
class Filters < Selectors
|
30
|
+
def sizes
|
31
|
+
# count true
|
32
|
+
@sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Index selectors of sub-dataframes
|
37
|
+
class Indices < Selectors
|
38
|
+
def sizes
|
39
|
+
@sizes = @selectors.map(&:size)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private_constant :Selectors, :Filters, :Indices
|
44
|
+
|
13
45
|
class << self
|
14
46
|
# Create SubFrames from a Group.
|
15
47
|
#
|
@@ -79,13 +111,8 @@ module RedAmber
|
|
79
111
|
def by_indices(dataframe, subset_indices)
|
80
112
|
instance = allocate
|
81
113
|
instance.instance_variable_set(:@baseframe, dataframe)
|
82
|
-
|
83
|
-
|
84
|
-
subset_indices.each do |i|
|
85
|
-
y.yield dataframe.take(i)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
instance.instance_variable_set(:@enum, enum)
|
114
|
+
instance.instance_variable_set(:@selectors, Indices.new(subset_indices))
|
115
|
+
instance.instance_variable_set(:@frames, [])
|
89
116
|
instance
|
90
117
|
end
|
91
118
|
|
@@ -105,13 +132,8 @@ module RedAmber
|
|
105
132
|
def by_filters(dataframe, subset_filters)
|
106
133
|
instance = allocate
|
107
134
|
instance.instance_variable_set(:@baseframe, dataframe)
|
108
|
-
|
109
|
-
|
110
|
-
subset_filters.each do |i|
|
111
|
-
y.yield dataframe.filter(i)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
instance.instance_variable_set(:@enum, enum)
|
135
|
+
instance.instance_variable_set(:@selectors, Filters.new(subset_filters))
|
136
|
+
instance.instance_variable_set(:@frames, [])
|
115
137
|
instance
|
116
138
|
end
|
117
139
|
|
@@ -130,18 +152,13 @@ module RedAmber
|
|
130
152
|
case Array(dataframes)
|
131
153
|
when [] || [nil]
|
132
154
|
instance.instance_variable_set(:@baseframe, DataFrame.new)
|
155
|
+
instance.instance_variable_set(:@selectors, [])
|
133
156
|
instance.instance_variable_set(:@frames, [])
|
134
|
-
enum = [].each
|
135
157
|
else
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
y.yield i
|
140
|
-
end
|
141
|
-
end
|
142
|
-
instance.instance_variable_set(:@baseframe, enum.reduce(&:concatenate))
|
158
|
+
instance.instance_variable_set(:@baseframe, nil)
|
159
|
+
instance.instance_variable_set(:@selectors, nil)
|
160
|
+
instance.instance_variable_set(:@frames, dataframes)
|
143
161
|
end
|
144
|
-
instance.instance_variable_set(:@enum, enum)
|
145
162
|
instance
|
146
163
|
end
|
147
164
|
|
@@ -160,11 +177,13 @@ module RedAmber
|
|
160
177
|
# @return [SubFrames]
|
161
178
|
# a new SubFrames.
|
162
179
|
#
|
180
|
+
# @since 0.4.0
|
181
|
+
#
|
163
182
|
def define_subframable_method(method)
|
164
183
|
define_method(method) do |&block|
|
165
184
|
return enum_for(:each) { size } unless block # rubocop:disable Lint/ToEnumArguments
|
166
185
|
|
167
|
-
|
186
|
+
SubFrames.by_dataframes(super(&block))
|
168
187
|
end
|
169
188
|
end
|
170
189
|
end
|
@@ -195,25 +214,31 @@ module RedAmber
|
|
195
214
|
# 4 5 B true
|
196
215
|
# 5 6 C false
|
197
216
|
#
|
198
|
-
#
|
217
|
+
# # --- This object is used as common source in this class ---
|
218
|
+
# subframes = SubFrames.new(dataframe, [[0 ,1], [2, 3, 4], [5]])
|
199
219
|
#
|
200
220
|
# # =>
|
201
|
-
# #<RedAmber::SubFrames :
|
202
|
-
# @baseframe=#<RedAmber::DataFrame :
|
203
|
-
#
|
221
|
+
# #<RedAmber::SubFrames : 0x000000000000cf6c>
|
222
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000cf80>
|
223
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
204
224
|
# ---
|
205
|
-
# #<RedAmber::DataFrame :
|
225
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000cf94>
|
206
226
|
# x y z
|
207
227
|
# <uint8> <string> <boolean>
|
208
228
|
# 0 1 A false
|
209
|
-
# 1
|
210
|
-
# 2 4 B (nil)
|
229
|
+
# 1 2 A true
|
211
230
|
# ---
|
212
|
-
# #<RedAmber::DataFrame :
|
231
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000cfa8>
|
213
232
|
# x y z
|
214
233
|
# <uint8> <string> <boolean>
|
215
|
-
# 0
|
216
|
-
# 1
|
234
|
+
# 0 3 B false
|
235
|
+
# 1 4 B (nil)
|
236
|
+
# 2 5 B true
|
237
|
+
# ---
|
238
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000cfbc>
|
239
|
+
# x y z
|
240
|
+
# <uint8> <string> <boolean>
|
241
|
+
# 0 6 C false
|
217
242
|
#
|
218
243
|
# @overload initialize(dataframe)
|
219
244
|
# Create a new SubFrames object by block.
|
@@ -253,43 +278,37 @@ module RedAmber
|
|
253
278
|
#
|
254
279
|
# @since 0.4.0
|
255
280
|
#
|
256
|
-
def initialize(dataframe,
|
281
|
+
def initialize(dataframe, selectors = nil, &block)
|
257
282
|
unless dataframe.is_a?(DataFrame)
|
258
283
|
raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
|
259
284
|
end
|
260
285
|
|
261
286
|
if block
|
262
|
-
unless
|
287
|
+
unless selectors.nil?
|
263
288
|
raise SubFramesArgumentError, 'Must not specify both arguments and block.'
|
264
289
|
end
|
265
290
|
|
266
|
-
|
291
|
+
selectors = yield(dataframe)
|
267
292
|
end
|
268
293
|
|
269
|
-
if dataframe.empty? ||
|
294
|
+
if dataframe.empty? || selectors.nil? || selectors.empty?
|
270
295
|
@baseframe = DataFrame.new
|
271
|
-
@
|
272
|
-
@enum = @frames.each
|
296
|
+
@selectors = Selectors.new([])
|
273
297
|
else
|
274
|
-
@baseframe =
|
275
|
-
@
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
dataframe.filter(i)
|
283
|
-
else
|
284
|
-
raise SubFramesArgumentError, "illegal type: #{i}"
|
285
|
-
end
|
286
|
-
yielder.yield df
|
287
|
-
end
|
298
|
+
@baseframe = dataframe
|
299
|
+
@selectors =
|
300
|
+
if selectors[0].boolean?
|
301
|
+
Filters.new(selectors)
|
302
|
+
elsif selectors[0].numeric?
|
303
|
+
Indices.new(selectors)
|
304
|
+
else
|
305
|
+
raise SubFramesArgumentError, "illegal type: #{selectors}"
|
288
306
|
end
|
289
307
|
end
|
308
|
+
@frames = []
|
290
309
|
end
|
291
310
|
|
292
|
-
# Return concatenated SubFrames as a
|
311
|
+
# Return concatenated SubFrames as a DataFrame.
|
293
312
|
#
|
294
313
|
# Once evaluated, memorize it as @baseframe.
|
295
314
|
# @return [DataFrame]
|
@@ -325,13 +344,13 @@ module RedAmber
|
|
325
344
|
# returns self.
|
326
345
|
#
|
327
346
|
# @example Returns Enumerator
|
328
|
-
#
|
347
|
+
# subframes.each
|
329
348
|
#
|
330
349
|
# # =>
|
331
350
|
# #<Enumerator: ...>
|
332
351
|
#
|
333
352
|
# @example `to_a` from Enumerable.
|
334
|
-
#
|
353
|
+
# subframes.to_a
|
335
354
|
#
|
336
355
|
# # =>
|
337
356
|
# [#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
|
@@ -354,7 +373,7 @@ module RedAmber
|
|
354
373
|
# ]
|
355
374
|
#
|
356
375
|
# @example Concatenate SubFrames. This example is used in #concatenate.
|
357
|
-
#
|
376
|
+
# subframes.reduce(&:concatenate)
|
358
377
|
#
|
359
378
|
# # =>
|
360
379
|
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000004883c>
|
@@ -372,19 +391,120 @@ module RedAmber
|
|
372
391
|
def each(&block)
|
373
392
|
return enum_for(__method__) { size } unless block
|
374
393
|
|
375
|
-
|
394
|
+
if @selectors
|
395
|
+
@selectors.each.with_index do |selector, i|
|
396
|
+
if i < @frames.size
|
397
|
+
yield @frames[i]
|
398
|
+
else
|
399
|
+
frame = get_subframe(selector)
|
400
|
+
@frames << frame
|
401
|
+
yield frame
|
402
|
+
end
|
403
|
+
end
|
404
|
+
else
|
405
|
+
@frames.each(&block)
|
406
|
+
end
|
376
407
|
nil
|
377
408
|
end
|
378
409
|
|
379
410
|
# Aggregate SubFrames to create a DataFrame.
|
380
411
|
#
|
381
|
-
# This method
|
382
|
-
# @
|
412
|
+
# This method creates a DataFrame with one row corresponding to one sub dataframe.
|
413
|
+
# @note This method does not check if aggregation function is used.
|
414
|
+
#
|
415
|
+
# @overload aggregate(keys)
|
416
|
+
#
|
417
|
+
# Aggregate SubFrames creating DataFrame with label `keys` and
|
418
|
+
# its column values by block.
|
419
|
+
#
|
420
|
+
# @param keys [Symbol, Array<Symbol>]
|
421
|
+
# a key or keys of result. Key names may be renamed to new label.
|
422
|
+
# @yieldparam dataframe [DataFrame]
|
423
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
424
|
+
# so inside of the block is the context of passed dataframe.
|
425
|
+
# @yieldreturn [Array]
|
426
|
+
# aggregated values from the columns of passed dataframe.
|
427
|
+
# @return [DataFrame]
|
428
|
+
# created DataFrame.
|
429
|
+
# @example Aggregate by key labels in arguments and values from block.
|
430
|
+
# subframes.aggregate(:y, :sum_x) { [y.first, x.sum] }
|
431
|
+
#
|
432
|
+
# # =>
|
433
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
434
|
+
# y sum_x
|
435
|
+
# <string> <uint8>
|
436
|
+
# 0 A 3
|
437
|
+
# 1 B 12
|
438
|
+
# 2 C 6
|
439
|
+
#
|
440
|
+
# @example Aggregate by key labels in an Array and values from block.
|
441
|
+
# subframes.aggregate([:y, :sum_x]) { [y.first, x.sum] }
|
442
|
+
#
|
443
|
+
# # =>
|
444
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
445
|
+
# y sum_x
|
446
|
+
# <string> <uint8>
|
447
|
+
# 0 A 3
|
448
|
+
# 1 B 12
|
449
|
+
# 2 C 6
|
450
|
+
#
|
451
|
+
# @overload aggregate
|
452
|
+
#
|
453
|
+
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated value
|
454
|
+
# in Hash from the block.
|
455
|
+
#
|
456
|
+
# @yieldparam dataframe [DataFrame]
|
457
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
458
|
+
# so inside of the block is the context of passed dataframe.
|
459
|
+
# @yieldreturn [Hash<key => aggregated_value>]
|
460
|
+
# pairs of key name and aggregated values from the columns of passed dataframe.
|
461
|
+
# Key names may be renamed to new label in the result.
|
462
|
+
# @return [DataFrame]
|
463
|
+
# created DataFrame.
|
464
|
+
# @example Aggregate by key and value pairs from block.
|
465
|
+
# subframes.aggregate do
|
466
|
+
# { y: y.first, sum_x: x.sum }
|
467
|
+
# end
|
468
|
+
#
|
469
|
+
# # =>
|
470
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
471
|
+
# y sum_x
|
472
|
+
# <string> <uint8>
|
473
|
+
# 0 A 3
|
474
|
+
# 1 B 12
|
475
|
+
# 2 C 6
|
476
|
+
#
|
477
|
+
# @overload aggregate
|
478
|
+
#
|
479
|
+
# Aggregate SubFrames creating DataFrame with an Array of key and aggregated value
|
480
|
+
# from the block.
|
481
|
+
#
|
482
|
+
# @yieldparam dataframe [DataFrame]
|
483
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
484
|
+
# so inside of the block is the context of passed dataframe.
|
485
|
+
# @yieldreturn [Array<key, aggregated_value>]
|
486
|
+
# pairs of key name and aggregated values from the columns of passed dataframe.
|
487
|
+
# Key names may be renamed to new label in the result.
|
488
|
+
# @return [DataFrame]
|
489
|
+
# created DataFrame.
|
490
|
+
# @example Aggregate by key and value arrays from block.
|
491
|
+
# subframes.aggregate do
|
492
|
+
# [[:y, y.first], [:sum_x, x.sum]]
|
493
|
+
# end
|
494
|
+
#
|
495
|
+
# # =>
|
496
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
497
|
+
# y sum_x
|
498
|
+
# <string> <uint8>
|
499
|
+
# 0 A 3
|
500
|
+
# 1 B 12
|
501
|
+
# 2 C 6
|
383
502
|
#
|
384
503
|
# @overload aggregate(group_keys, aggregations)
|
385
504
|
#
|
386
505
|
# Aggregate SubFrames for first values of the columns of
|
387
506
|
# `group_keys` and the aggregated results of key-function pairs.
|
507
|
+
# [Experimental] This API may be changed in the future.
|
388
508
|
#
|
389
509
|
# @param group_keys [Symbol, String, Array<Symbol, String>]
|
390
510
|
# group key name(s) to output values.
|
@@ -393,47 +513,23 @@ module RedAmber
|
|
393
513
|
# Vector aggregate function name to apply.
|
394
514
|
# @return [DataFrame]
|
395
515
|
# an aggregated DataFrame.
|
396
|
-
# @example
|
397
|
-
# subframes
|
398
|
-
#
|
399
|
-
# # =>
|
400
|
-
# #<RedAmber::SubFrames : 0x0000000000003980>
|
401
|
-
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003994>
|
402
|
-
# 3 SubFrames: [2, 3, 1] in sizes.
|
403
|
-
# ---
|
404
|
-
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000039a8>
|
405
|
-
# x y z
|
406
|
-
# <uint8> <string> <boolean>
|
407
|
-
# 0 1 A false
|
408
|
-
# 1 2 A true
|
409
|
-
# ---
|
410
|
-
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000039bc>
|
411
|
-
# x y z
|
412
|
-
# <uint8> <string> <boolean>
|
413
|
-
# 0 3 B false
|
414
|
-
# 1 4 B (nil)
|
415
|
-
# 2 5 B true
|
416
|
-
# ---
|
417
|
-
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000039d0>
|
418
|
-
# x y z
|
419
|
-
# <uint8> <string> <boolean>
|
420
|
-
# 0 6 C false
|
421
|
-
#
|
422
|
-
# subframes.aggregate(:y, { x: :sum })
|
516
|
+
# @example Aggregate with a group key and key function pairs by a Hash.
|
517
|
+
# subframes.aggregate(:y, { x: :sum, z: :count })
|
423
518
|
#
|
424
519
|
# # =>
|
425
520
|
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
426
|
-
# y sum_x
|
427
|
-
# <string> <uint8>
|
428
|
-
# 0 A 3
|
429
|
-
# 1 B 12
|
430
|
-
# 2 C 6
|
521
|
+
# y sum_x count_z
|
522
|
+
# <string> <uint8> <uint8>
|
523
|
+
# 0 A 3 2
|
524
|
+
# 1 B 12 2
|
525
|
+
# 2 C 6 1
|
431
526
|
#
|
432
527
|
# @overload aggregate(group_keys, aggregations)
|
433
528
|
#
|
434
529
|
# Aggregate SubFrames for first values of the columns of
|
435
530
|
# `group_keys` and the aggregated results of all combinations
|
436
531
|
# of supplied keys and functions.
|
532
|
+
# [Experimental] This API may be changed in the future.
|
437
533
|
#
|
438
534
|
# @param group_keys [Symbol, String, Array<Symbol, String>]
|
439
535
|
# group key name(s) to output values.
|
@@ -442,83 +538,60 @@ module RedAmber
|
|
442
538
|
# Array of Vector aggregate function names to apply.
|
443
539
|
# @return [DataFrame]
|
444
540
|
# an aggregated DataFrame.
|
445
|
-
# @example
|
541
|
+
# @example Aggregate with group keys and keys and functions by an Array.
|
446
542
|
# sf.aggregate(:y, [[:x, :z], [:count, :sum]])
|
447
543
|
#
|
448
544
|
# # =>
|
449
545
|
# #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fcbc>
|
450
|
-
# y count_x count_z
|
546
|
+
# y count_x sum_x count_z sum_z
|
451
547
|
# <string> <uint8> <uint8> <uint8> <uint8>
|
452
|
-
# 0 A 2 2
|
453
|
-
# 1 B 3
|
454
|
-
# 2 C 1 1
|
548
|
+
# 0 A 2 3 2 1
|
549
|
+
# 1 B 3 12 2 1
|
550
|
+
# 2 C 1 6 1 0
|
455
551
|
#
|
456
552
|
# @since 0.4.0
|
457
553
|
#
|
458
|
-
def aggregate(
|
554
|
+
def aggregate(*args, &block)
|
459
555
|
aggregator =
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
556
|
+
if block
|
557
|
+
if args.empty?
|
558
|
+
# aggregate { {key => value} or [[key, value], ...] }
|
559
|
+
each_with_object(Hash.new { |h, k| h[k] = [] }) do |df, hash|
|
560
|
+
df.instance_eval(&block).to_h.each do |k, v|
|
561
|
+
hash[k] << v
|
562
|
+
end
|
466
563
|
end
|
467
|
-
|
468
|
-
|
564
|
+
else
|
565
|
+
# aggregate(keys) { values }
|
566
|
+
values = each.map { |df| Array(df.instance_eval(&block)) }.transpose
|
567
|
+
args.flatten.zip(values)
|
469
568
|
end
|
470
|
-
|
471
|
-
functions
|
472
|
-
|
473
|
-
|
474
|
-
|
569
|
+
else
|
570
|
+
# These functions may be removed in the future.
|
571
|
+
case args
|
572
|
+
in [group_keys1, Hash => h]
|
573
|
+
# aggregate(group_keys, { key => func })
|
574
|
+
ary = Array(group_keys1).map { |key| [:first, key] }
|
575
|
+
ary.concat(h.to_a.map { [_2, _1] }) # rubocop:disable Style/NumberedParametersLimit
|
576
|
+
in [group_keys2, [Array => keys, Array => funcs]]
|
577
|
+
# aggregate(group_keys, [keys, funcs])
|
578
|
+
ary = Array(group_keys2).map { |key| [:first, key] }
|
579
|
+
ary.concat(funcs.product(keys))
|
580
|
+
else
|
581
|
+
raise SubFramesArgumentError, "invalid argument: #{args}"
|
475
582
|
end
|
476
583
|
sf = self
|
477
|
-
|
478
|
-
|
584
|
+
ary.map do |func, key|
|
585
|
+
label = func == :first ? key : "#{func}_#{key}"
|
586
|
+
[label, sf.each.map { |df| df[key].send(func) }]
|
479
587
|
end
|
480
|
-
else
|
481
|
-
raise SubFramesArgumentError, "invalid argument: #{aggregations}"
|
482
588
|
end
|
483
|
-
|
484
|
-
if group_keys.empty?
|
485
|
-
DataFrame.new(aggregator)
|
486
|
-
else
|
487
|
-
baseframe
|
488
|
-
.pick(group_keys)
|
489
|
-
.slice(offset_indices)
|
490
|
-
.assign(aggregator)
|
491
|
-
end
|
589
|
+
DataFrame.new(aggregator)
|
492
590
|
end
|
493
591
|
|
494
592
|
# Returns a SubFrames containing DataFrames returned by the block.
|
495
593
|
#
|
496
594
|
# @example Map as it is.
|
497
|
-
# subframes
|
498
|
-
#
|
499
|
-
# # =>
|
500
|
-
# #<RedAmber::SubFrames : 0x000000000001359c>
|
501
|
-
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000135b0>
|
502
|
-
# 3 SubFrames: [2, 3, 1] in sizes.
|
503
|
-
# ---
|
504
|
-
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
|
505
|
-
# x y z
|
506
|
-
# <uint8> <string> <boolean>
|
507
|
-
# 0 1 A false
|
508
|
-
# 1 2 A true
|
509
|
-
# ---
|
510
|
-
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
|
511
|
-
# x y z
|
512
|
-
# <uint8> <string> <boolean>
|
513
|
-
# 0 3 B false
|
514
|
-
# 1 4 B (nil)
|
515
|
-
# 2 5 B true
|
516
|
-
# ---
|
517
|
-
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
|
518
|
-
# x y z
|
519
|
-
# <uint8> <string> <boolean>
|
520
|
-
# 0 6 C false
|
521
|
-
#
|
522
595
|
# subframes.map { _1 }
|
523
596
|
#
|
524
597
|
# # This will create a new SubFrame and a new baseframe,
|
@@ -593,31 +666,6 @@ module RedAmber
|
|
593
666
|
# @return [SubFrames]
|
594
667
|
# a new SubFrames object with updated DataFrames.
|
595
668
|
# @example
|
596
|
-
# subframes
|
597
|
-
#
|
598
|
-
# # =>
|
599
|
-
# #<RedAmber::SubFrames : 0x000000000000c33c>
|
600
|
-
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c350>
|
601
|
-
# 3 SubFrames: [2, 3, 1] in sizes.
|
602
|
-
# ---
|
603
|
-
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c364>
|
604
|
-
# x y z
|
605
|
-
# <uint8> <string> <boolean>
|
606
|
-
# 0 1 A false
|
607
|
-
# 1 2 A true
|
608
|
-
# ---
|
609
|
-
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
610
|
-
# x y z
|
611
|
-
# <uint8> <string> <boolean>
|
612
|
-
# 0 3 B false
|
613
|
-
# 1 4 B (nil)
|
614
|
-
# 2 5 B true
|
615
|
-
# ---
|
616
|
-
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000c38c>
|
617
|
-
# x y z
|
618
|
-
# <uint8> <string> <boolean>
|
619
|
-
# 0 6 C false
|
620
|
-
#
|
621
669
|
# subframes.assign(:x_plus1) { x + 1 }
|
622
670
|
#
|
623
671
|
# # =>
|
@@ -887,6 +935,26 @@ module RedAmber
|
|
887
935
|
#
|
888
936
|
define_subframable_method :filter_map
|
889
937
|
|
938
|
+
# Return 0...num sub-dataframes in self.
|
939
|
+
#
|
940
|
+
# @param num [Integer, Float]
|
941
|
+
# num of sub-dataframes to pick up. `num`` must be positive or zero.
|
942
|
+
# @return [SubFrames]
|
943
|
+
# A new SubFrames.
|
944
|
+
# If n == 0, it returns empty SubFrames.
|
945
|
+
# If n >= size, it returns self.
|
946
|
+
# @since 0.4.2
|
947
|
+
#
|
948
|
+
def take(num)
|
949
|
+
if num.zero?
|
950
|
+
SubFrames.new(DataFrame.new, [])
|
951
|
+
elsif num >= size
|
952
|
+
self
|
953
|
+
else
|
954
|
+
SubFrames.by_dataframes(frames(num))
|
955
|
+
end
|
956
|
+
end
|
957
|
+
|
890
958
|
# Number of subsets.
|
891
959
|
#
|
892
960
|
# @return [Integer]
|
@@ -894,7 +962,12 @@ module RedAmber
|
|
894
962
|
# @since 0.4.0
|
895
963
|
#
|
896
964
|
def size
|
897
|
-
@size ||=
|
965
|
+
@size ||=
|
966
|
+
if @selectors
|
967
|
+
@selectors.size
|
968
|
+
else
|
969
|
+
@frames.size
|
970
|
+
end
|
898
971
|
end
|
899
972
|
|
900
973
|
# Size list of subsets.
|
@@ -904,7 +977,12 @@ module RedAmber
|
|
904
977
|
# @since 0.4.0
|
905
978
|
#
|
906
979
|
def sizes
|
907
|
-
@sizes ||=
|
980
|
+
@sizes ||=
|
981
|
+
if @selectors
|
982
|
+
@selectors.sizes
|
983
|
+
else
|
984
|
+
@frames.map(&:size)
|
985
|
+
end
|
908
986
|
end
|
909
987
|
|
910
988
|
# Indices at the top of each sub DataFrames.
|
@@ -912,14 +990,21 @@ module RedAmber
|
|
912
990
|
# @return [Array<Integer>]
|
913
991
|
# indices of offset of each sub DataFrames.
|
914
992
|
# @example When `sizes` is [2, 3, 1].
|
915
|
-
#
|
993
|
+
# subframes.offset_indices # => [0, 2, 5]
|
916
994
|
# @since 0.4.0
|
917
995
|
#
|
918
996
|
def offset_indices
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
997
|
+
case @selectors
|
998
|
+
when Filters
|
999
|
+
@selectors.selectors.map do |selector|
|
1000
|
+
selector.each.with_index.find { |x, _| x }[1]
|
1001
|
+
end
|
1002
|
+
else # Indices, nil
|
1003
|
+
sum = 0
|
1004
|
+
sizes.map do |size|
|
1005
|
+
sum += size
|
1006
|
+
sum - size
|
1007
|
+
end
|
923
1008
|
end
|
924
1009
|
end
|
925
1010
|
|
@@ -936,11 +1021,11 @@ module RedAmber
|
|
936
1021
|
# Test if self has only one subset and it is comprehensive.
|
937
1022
|
#
|
938
1023
|
# @return [true, false]
|
939
|
-
# true if only member of self is equal to universal DataFrame.
|
1024
|
+
# true if the only member of self is equal to universal DataFrame.
|
940
1025
|
# @since 0.4.0
|
941
1026
|
#
|
942
1027
|
def universal?
|
943
|
-
size == 1 &&
|
1028
|
+
size == 1 && first == @baseframe
|
944
1029
|
end
|
945
1030
|
|
946
1031
|
# Return string representation of self.
|
@@ -983,7 +1068,7 @@ module RedAmber
|
|
983
1068
|
#
|
984
1069
|
# @since 0.4.0
|
985
1070
|
#
|
986
|
-
def to_s(limit:
|
1071
|
+
def to_s(limit: 5)
|
987
1072
|
_to_s(limit: limit)
|
988
1073
|
end
|
989
1074
|
|
@@ -1035,23 +1120,66 @@ module RedAmber
|
|
1035
1120
|
#
|
1036
1121
|
# @since 0.4.0
|
1037
1122
|
#
|
1038
|
-
def inspect(limit:
|
1123
|
+
def inspect(limit: 5)
|
1124
|
+
shape =
|
1125
|
+
if @baseframe.nil?
|
1126
|
+
'(Not prepared)'
|
1127
|
+
else
|
1128
|
+
baseframe.shape_str(with_id: true)
|
1129
|
+
end
|
1039
1130
|
sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ')
|
1040
1131
|
"#<#{self.class} : #{format('0x%016x', object_id)}>\n" \
|
1041
|
-
"@baseframe=#<#{
|
1132
|
+
"@baseframe=#<#{shape}>\n" \
|
1042
1133
|
"#{size} SubFrame#{pl(size)}: " \
|
1043
1134
|
"[#{sizes_truncated}] in size#{pl(size)}.\n" \
|
1044
1135
|
"---\n#{_to_s(limit: limit, with_id: true)}"
|
1045
1136
|
end
|
1046
1137
|
|
1138
|
+
# Return an Array of sub DataFrames
|
1139
|
+
#
|
1140
|
+
# @overload frames
|
1141
|
+
# Returns all sub dataframes.
|
1142
|
+
#
|
1143
|
+
# @return [Array<DataFrame>]
|
1144
|
+
# sub DataFrames.
|
1145
|
+
#
|
1146
|
+
# @overload frames(n_frames)
|
1147
|
+
# Returns partial sub dataframes.
|
1148
|
+
#
|
1149
|
+
# @param n_frames [Integer]
|
1150
|
+
# num of dataframes to retrieve.
|
1151
|
+
# @return [Array<DataFrame>]
|
1152
|
+
# sub DataFrames.
|
1153
|
+
#
|
1154
|
+
# @since 0.4.2
|
1155
|
+
#
|
1156
|
+
def frames(n_frames = nil)
|
1157
|
+
n_frames = size if n_frames.nil?
|
1158
|
+
|
1159
|
+
if @frames.size < n_frames
|
1160
|
+
@frames = each.take(n_frames)
|
1161
|
+
else
|
1162
|
+
@frames.take(n_frames)
|
1163
|
+
end
|
1164
|
+
end
|
1165
|
+
|
1047
1166
|
private
|
1048
1167
|
|
1049
|
-
|
1050
|
-
|
1168
|
+
# Get sub dataframe specified by 'selector'
|
1169
|
+
def get_subframe(selector)
|
1170
|
+
df =
|
1171
|
+
case @selectors
|
1172
|
+
when Filters
|
1173
|
+
@baseframe.filter(selector)
|
1174
|
+
when Indices
|
1175
|
+
@baseframe.take(selector)
|
1176
|
+
end
|
1177
|
+
DataFrame.new_dataframe_with_schema(@baseframe, df)
|
1051
1178
|
end
|
1052
1179
|
|
1053
|
-
|
1054
|
-
|
1180
|
+
# Subcontractor of to_s
|
1181
|
+
def _to_s(limit: 5, with_id: false)
|
1182
|
+
a = each.take(limit).map do |df|
|
1055
1183
|
if with_id
|
1056
1184
|
"#<#{df.shape_str(with_id: with_id)}>\n" \
|
1057
1185
|
"#{df.to_s(head: 2, tail: 2)}"
|