red_amber 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -0,0 +1,1066 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # class SubFrames treats a set of subsets of a DataFrame
5
+ # [Experimental feature] Class SubFrames may be removed or be changed in the future.
6
+ class SubFrames
7
+ include Enumerable # may change to use Forwardable.
8
+ include Helper
9
+
10
+ using RefineArray
11
+ using RefineArrayLike
12
+
13
+ class << self
14
+ # Create SubFrames from a Group.
15
+ #
16
+ # [Experimental feature] this method may be removed or be changed in the future.
17
+ # @param group [Group]
18
+ # a Group to be used to create SubFrames.
19
+ # @return [SubFrames]
20
+ # a created SubFrames.
21
+ # @example
22
+ # dataframe
23
+ #
24
+ # # =>
25
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
26
+ # x y z
27
+ # <uint8> <string> <boolean>
28
+ # 0 1 A false
29
+ # 1 2 A true
30
+ # 2 3 B false
31
+ # 3 4 B (nil)
32
+ # 4 5 B true
33
+ # 5 6 C false
34
+ #
35
+ # group = Group.new(dataframe, [:y])
36
+ # sf = SubFrames.by_group(group)
37
+ #
38
+ # # =>
39
+ # #<RedAmber::SubFrames : 0x000000000000fbb8>
40
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fb7c>
41
+ # 3 SubFrames: [2, 3, 1] in sizes.
42
+ # ---
43
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fbcc>
44
+ # x y z
45
+ # <uint8> <string> <boolean>
46
+ # 0 1 A false
47
+ # 1 2 A true
48
+ # ---
49
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbe0>
50
+ # x y z
51
+ # <uint8> <string> <boolean>
52
+ # 0 3 B false
53
+ # 1 4 B (nil)
54
+ # 2 5 B true
55
+ # ---
56
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fbf4>
57
+ # x y z
58
+ # <uint8> <string> <boolean>
59
+ # 0 6 C false
60
+ #
61
+ # @since 0.4.0
62
+ #
63
+ def by_group(group)
64
+ SubFrames.new(group.dataframe, group.filters)
65
+ end
66
+
67
+ # Create a new SubFrames object from a DataFrame and an array of indices.
68
+ #
69
+ # @api private
70
+ # @note this method doesn't check arguments.
71
+ # @param dataframe [DataFrame]
72
+ # a source dataframe.
73
+ # @param subset_indices [Array, Array<Vector>]
74
+ # an Array of numeric indices to create subsets of DataFrame.
75
+ # @return [SubFrames]
76
+ # a new SubFrames object.
77
+ # @since 0.4.0
78
+ #
79
+ def by_indices(dataframe, subset_indices)
80
+ instance = allocate
81
+ instance.instance_variable_set(:@baseframe, dataframe)
82
+ enum =
83
+ Enumerator.new(subset_indices.size) do |y|
84
+ subset_indices.each do |i|
85
+ y.yield dataframe.take(i)
86
+ end
87
+ end
88
+ instance.instance_variable_set(:@enum, enum)
89
+ instance
90
+ end
91
+
92
+ # Create a new SubFrames object from a DataFrame and an array of filters.
93
+ #
94
+ # @api private
95
+ # @note this method doesn't check arguments.
96
+ # @param dataframe [DataFrame]
97
+ # a source dataframe.
98
+ # @param subset_filters [Array, Array<Vector>]
99
+ # an Array of booleans to specify subsets of DataFrame.
100
+ # Each filters must have same length as dataframe.
101
+ # @return [SubFrames]
102
+ # a new SubFrames object.
103
+ # @since 0.4.0
104
+ #
105
+ def by_filters(dataframe, subset_filters)
106
+ instance = allocate
107
+ instance.instance_variable_set(:@baseframe, dataframe)
108
+ enum =
109
+ Enumerator.new(subset_filters.size) do |y|
110
+ subset_filters.each do |i|
111
+ y.yield dataframe.filter(i)
112
+ end
113
+ end
114
+ instance.instance_variable_set(:@enum, enum)
115
+ instance
116
+ end
117
+
118
+ # Create a new SubFrames from an Array of DataFrames.
119
+ #
120
+ # @api private
121
+ # @note dataframes must have same schema.
122
+ # @param dataframes [Array<DataFrame>]
123
+ # an array of DataFrames which have same schema.
124
+ # @return [SubFrames]
125
+ # a new SubFrames object.
126
+ # @since 0.4.0
127
+ #
128
+ def by_dataframes(dataframes)
129
+ instance = allocate
130
+ case Array(dataframes)
131
+ when [] || [nil]
132
+ instance.instance_variable_set(:@baseframe, DataFrame.new)
133
+ instance.instance_variable_set(:@frames, [])
134
+ enum = [].each
135
+ else
136
+ enum =
137
+ Enumerator.new(dataframes.size) do |y|
138
+ dataframes.each do |i|
139
+ y.yield i
140
+ end
141
+ end
142
+ instance.instance_variable_set(:@baseframe, enum.reduce(&:concatenate))
143
+ end
144
+ instance.instance_variable_set(:@enum, enum)
145
+ instance
146
+ end
147
+
148
+ private
149
+
150
+ # This method upgrades a iterating method from Enumerable to return SubFrames.
151
+
152
+ # @!macro [attach] define_subframable_method
153
+ #
154
+ # [Returns SubFrames] Use `#each.$1` if you want to get DataFrames by Array.
155
+ # Returns an Enumerator with no block given.
156
+ # @yieldparam dataframe [DataFrame]
157
+ # gives each element.
158
+ # @yieldreturn [Array<DataFrame>]
159
+ # the block should return DataFrames with same schema.
160
+ # @return [SubFrames]
161
+ # a new SubFrames.
162
+ #
163
+ def define_subframable_method(method)
164
+ define_method(method) do |&block|
165
+ return enum_for(:each) { size } unless block # rubocop:disable Lint/ToEnumArguments
166
+
167
+ self.class.by_dataframes(super(&block))
168
+ end
169
+ end
170
+ end
171
+
172
+ # Create a new SubFrames object from a DataFrame and an array of indices or filters.
173
+ #
174
+ # @overload initialize(dataframe, subset_specifier)
175
+ # Create a new SubFrames object.
176
+ #
177
+ # @param dataframe [DataFrame]
178
+ # a source dataframe.
179
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
180
+ # an Array of numeric indices or boolean filters
181
+ # to create subsets of DataFrame.
182
+ # @return [SubFrames]
183
+ # new SubFrames.
184
+ # @example
185
+ # dataframe
186
+ #
187
+ # # =>
188
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000039e4>
189
+ # x y z
190
+ # <uint8> <string> <boolean>
191
+ # 0 1 A false
192
+ # 1 2 A true
193
+ # 2 3 B false
194
+ # 3 4 B (nil)
195
+ # 4 5 B true
196
+ # 5 6 C false
197
+ #
198
+ # SubFrames.new(dataframe, [[0, 2, 3], [4, 1]])
199
+ #
200
+ # # =>
201
+ # #<RedAmber::SubFrames : 0x0000000000003a34>
202
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000003a48>
203
+ # 2 SubFrames: [3, 2] in sizes.
204
+ # ---
205
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a5c>
206
+ # x y z
207
+ # <uint8> <string> <boolean>
208
+ # 0 1 A false
209
+ # 1 3 B false
210
+ # 2 4 B (nil)
211
+ # ---
212
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a70>
213
+ # x y z
214
+ # <uint8> <string> <boolean>
215
+ # 0 5 B true
216
+ # 1 2 A true
217
+ #
218
+ # @overload initialize(dataframe)
219
+ # Create a new SubFrames object by block.
220
+ #
221
+ # @param dataframe [DataFrame]
222
+ # a source dataframe.
223
+ # @yieldparam dataframe [DataFrame]
224
+ # the block is called with `dataframe`.
225
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
226
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
227
+ # All array-likes are responsible to #numeric? or #boolean?.
228
+ # @return [SubFrames]
229
+ # a new SubFrames object.
230
+ # @example
231
+ # SubFrames.new(dataframe) do |df|
232
+ # booleans = df[:z]
233
+ # [booleans, !booleans]
234
+ # end
235
+ #
236
+ # # =>
237
+ # #<RedAmber::SubFrames : 0x0000000000003aac>
238
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000003ac0>
239
+ # 2 SubFrames: [2, 3] in sizes.
240
+ # ---
241
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003ad4>
242
+ # x y z
243
+ # <uint8> <string> <boolean>
244
+ # 0 2 A true
245
+ # 1 5 B true
246
+ # ---
247
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003ae8>
248
+ # x y z
249
+ # <uint8> <string> <boolean>
250
+ # 0 1 A false
251
+ # 1 3 B false
252
+ # 2 6 C false
253
+ #
254
+ # @since 0.4.0
255
+ #
256
+ def initialize(dataframe, subset_specifier = nil, &block)
257
+ unless dataframe.is_a?(DataFrame)
258
+ raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
259
+ end
260
+
261
+ if block
262
+ unless subset_specifier.nil?
263
+ raise SubFramesArgumentError, 'Must not specify both arguments and block.'
264
+ end
265
+
266
+ subset_specifier = yield(dataframe)
267
+ end
268
+
269
+ if dataframe.empty? || subset_specifier.nil? || subset_specifier.empty?
270
+ @baseframe = DataFrame.new
271
+ @frames = []
272
+ @enum = @frames.each
273
+ else
274
+ @baseframe = nil
275
+ @enum =
276
+ Enumerator.new(subset_specifier.size) do |yielder|
277
+ subset_specifier.map do |i|
278
+ df =
279
+ if i.numeric?
280
+ dataframe.take(i)
281
+ elsif i.boolean?
282
+ dataframe.filter(i)
283
+ else
284
+ raise SubFramesArgumentError, "illegal type: #{i}"
285
+ end
286
+ yielder.yield df
287
+ end
288
+ end
289
+ end
290
+ end
291
+
292
+ # Return concatenated SubFrames as a DataDrame.
293
+ #
294
+ # Once evaluated, memorize it as @baseframe.
295
+ # @return [DataFrame]
296
+ # a concatenated DataFrame.
297
+ # @since 0.4.0
298
+ #
299
+ def baseframe
300
+ @baseframe ||= reduce(&:concatenate)
301
+ end
302
+ alias_method :concatenate, :baseframe
303
+ alias_method :concat, :baseframe
304
+
305
+ # Iterates over sub DataFrames or returns an Enumerator.
306
+ #
307
+ # This method will memorize sub DataFrames and always returns the same object.
308
+ # The Class SubFrames is including Enumerable module.
309
+ # So many methods in Enumerable are available.
310
+ #
311
+ # @overload each
312
+ # Returns a new Enumerator if no block given.
313
+ #
314
+ # @return [Enumerator]
315
+ # Enumerator of each elements.
316
+ #
317
+ # @overload each
318
+ # When a block given, passes each sub DataFrames to the block.
319
+ #
320
+ # @yieldparam subframe [DataFrame]
321
+ # passes sub DataFrame by a block parameter.
322
+ # @yieldreturn [Object]
323
+ # evaluated result value from the block.
324
+ # @return [self]
325
+ # returns self.
326
+ #
327
+ # @example Returns Enumerator
328
+ # sf.each
329
+ #
330
+ # # =>
331
+ # #<Enumerator: ...>
332
+ #
333
+ # @example `to_a` from Enumerable.
334
+ # sf.to_a
335
+ #
336
+ # # =>
337
+ # [#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
338
+ # x y z
339
+ # <uint8> <string> <boolean>
340
+ # 0 1 A false
341
+ # 1 2 A true
342
+ # ,
343
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000002a134>
344
+ # x y z
345
+ # <uint8> <string> <boolean>
346
+ # 0 3 B false
347
+ # 1 4 B (nil)
348
+ # 2 5 B true
349
+ # ,
350
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
351
+ # x y z
352
+ # <uint8> <string> <boolean>
353
+ # 0 6 C false
354
+ # ]
355
+ #
356
+ # @example Concatenate SubFrames. This example is used in #concatenate.
357
+ # sf.reduce(&:concatenate)
358
+ #
359
+ # # =>
360
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000004883c>
361
+ # x y z
362
+ # <uint8> <string> <boolean>
363
+ # 0 1 A false
364
+ # 1 2 A true
365
+ # 2 3 B false
366
+ # 3 4 B (nil)
367
+ # 4 5 B true
368
+ # 5 6 C false
369
+ #
370
+ # @since 0.4.0
371
+ #
372
+ def each(&block)
373
+ return enum_for(__method__) { size } unless block
374
+
375
+ frames.each(&block)
376
+ nil
377
+ end
378
+
379
+ # Aggregate SubFrames to create a DataFrame.
380
+ #
381
+ # This method will check if built-in aggregation function is used.
382
+ # @todo Support user-defined aggregation functions.
383
+ #
384
+ # @overload aggregate(group_keys, aggregations)
385
+ #
386
+ # Aggregate SubFrames for first values of the columns of
387
+ # `group_keys` and the aggregated results of key-function pairs.
388
+ #
389
+ # @param group_keys [Symbol, String, Array<Symbol, String>]
390
+ # group key name(s) to output values.
391
+ # @param aggregations [Hash<Array<Symbol, String> => Array<:Symbol>>]
392
+ # a Hash of variable (column) name and
393
+ # Vector aggregate function name to apply.
394
+ # @return [DataFrame]
395
+ # an aggregated DataFrame.
396
+ # @example
397
+ # subframes
398
+ #
399
+ # # =>
400
+ # #<RedAmber::SubFrames : 0x0000000000003980>
401
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003994>
402
+ # 3 SubFrames: [2, 3, 1] in sizes.
403
+ # ---
404
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000039a8>
405
+ # x y z
406
+ # <uint8> <string> <boolean>
407
+ # 0 1 A false
408
+ # 1 2 A true
409
+ # ---
410
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000039bc>
411
+ # x y z
412
+ # <uint8> <string> <boolean>
413
+ # 0 3 B false
414
+ # 1 4 B (nil)
415
+ # 2 5 B true
416
+ # ---
417
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000039d0>
418
+ # x y z
419
+ # <uint8> <string> <boolean>
420
+ # 0 6 C false
421
+ #
422
+ # subframes.aggregate(:y, { x: :sum })
423
+ #
424
+ # # =>
425
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
426
+ # y sum_x
427
+ # <string> <uint8>
428
+ # 0 A 3
429
+ # 1 B 12
430
+ # 2 C 6
431
+ #
432
+ # @overload aggregate(group_keys, aggregations)
433
+ #
434
+ # Aggregate SubFrames for first values of the columns of
435
+ # `group_keys` and the aggregated results of all combinations
436
+ # of supplied keys and functions.
437
+ #
438
+ # @param group_keys [Symbol, String, Array<Symbol, String>]
439
+ # group key name(s) to output values.
440
+ # @param aggregations [Array[Array<Symbol, String>, Array<:Symbol>]]
441
+ # an Array of Array of variable (column) names and
442
+ # Array of Vector aggregate function names to apply.
443
+ # @return [DataFrame]
444
+ # an aggregated DataFrame.
445
+ # @example
446
+ # sf.aggregate(:y, [[:x, :z], [:count, :sum]])
447
+ #
448
+ # # =>
449
+ # #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fcbc>
450
+ # y count_x count_z sum_x sum_z
451
+ # <string> <uint8> <uint8> <uint8> <uint8>
452
+ # 0 A 2 2 3 1
453
+ # 1 B 3 2 12 1
454
+ # 2 C 1 1 6 0
455
+ #
456
+ # @since 0.4.0
457
+ #
458
+ def aggregate(group_keys, aggregations)
459
+ aggregator =
460
+ case aggregations
461
+ in Hash
462
+ sf = self
463
+ aggregations.map do |key, func|
464
+ unless Vector.aggregate?(func)
465
+ raise SubFramesArgumentError, "not an aggregation function: #{func}"
466
+ end
467
+
468
+ ["#{func}_#{key}", sf.each.map { |df| df[key].send(func) }]
469
+ end
470
+ in [Array => keys, Array => functions]
471
+ functions.each do |func|
472
+ unless Vector.aggregate?(func)
473
+ raise SubFramesArgumentError, "not an aggregation function: #{func}"
474
+ end
475
+ end
476
+ sf = self
477
+ functions.product(keys).map do |func, key|
478
+ ["#{func}_#{key}", sf.each.map { |df| df[key].send(func) }]
479
+ end
480
+ else
481
+ raise SubFramesArgumentError, "invalid argument: #{aggregations}"
482
+ end
483
+
484
+ if group_keys.empty?
485
+ DataFrame.new(aggregator)
486
+ else
487
+ baseframe
488
+ .pick(group_keys)
489
+ .slice(offset_indices)
490
+ .assign(aggregator)
491
+ end
492
+ end
493
+
494
+ # Returns a SubFrames containing DataFrames returned by the block.
495
+ #
496
+ # @example Map as it is.
497
+ # subframes
498
+ #
499
+ # # =>
500
+ # #<RedAmber::SubFrames : 0x000000000001359c>
501
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000135b0>
502
+ # 3 SubFrames: [2, 3, 1] in sizes.
503
+ # ---
504
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
505
+ # x y z
506
+ # <uint8> <string> <boolean>
507
+ # 0 1 A false
508
+ # 1 2 A true
509
+ # ---
510
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
511
+ # x y z
512
+ # <uint8> <string> <boolean>
513
+ # 0 3 B false
514
+ # 1 4 B (nil)
515
+ # 2 5 B true
516
+ # ---
517
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
518
+ # x y z
519
+ # <uint8> <string> <boolean>
520
+ # 0 6 C false
521
+ #
522
+ # subframes.map { _1 }
523
+ #
524
+ # # This will create a new SubFrame and a new baseframe,
525
+ # # But each element DataFrames are re-used.
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000001e6cc>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000001e6e0>
529
+ # 3 SubFrames: [2, 3, 1] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # ---
537
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
538
+ # x y z
539
+ # <uint8> <string> <boolean>
540
+ # 0 3 B false
541
+ # 1 4 B (nil)
542
+ # 2 5 B true
543
+ # ---
544
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
545
+ # x y z
546
+ # <uint8> <string> <boolean>
547
+ # 0 6 C false
548
+ #
549
+ # @example Assign a new column.
550
+ # subframes.map { |df| df.assign(x_plus1: df[:x] + 1) }
551
+ #
552
+ # # =>
553
+ # #<RedAmber::SubFrames : 0x0000000000040948>
554
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000004095c>
555
+ # 3 SubFrames: [2, 3, 1] in sizes.
556
+ # ---
557
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x0000000000040970>
558
+ # x y z x_plus1
559
+ # <uint8> <string> <boolean> <uint8>
560
+ # 0 1 A false 2
561
+ # 1 2 A true 3
562
+ # ---
563
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x0000000000040984>
564
+ # x y z x_plus1
565
+ # <uint8> <string> <boolean> <uint8>
566
+ # 0 3 B false 4
567
+ # 1 4 B (nil) 5
568
+ # 2 5 B true 6
569
+ # ---
570
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x0000000000040998>
571
+ # x y z x_plus1
572
+ # <uint8> <string> <boolean> <uint8>
573
+ # 0 6 C false 7
574
+ #
575
+ # @since 0.4.0
576
+ #
577
+ define_subframable_method :map
578
+ alias_method :collect, :map
579
+
580
+ # Update existing column(s) or create new columns(s) for each DataFrames in self.
581
+ #
582
+ # Column values are updated by an oveloaded common operation.
583
+ #
584
+ # @overload assign(key)
585
+ # Assign a column by argument and block.
586
+ #
587
+ # @param key [Symbol, String]
588
+ # a key of column to assign.
589
+ # @yieldparam dataframe [DataFrame]
590
+ # gives overloaded dataframe in self to the block.
591
+ # @yieldreturn [Vector, Array, Arrow::Array]
592
+ # an updated column value which are overloaded.
593
+ # @return [SubFrames]
594
+ # a new SubFrames object with updated DataFrames.
595
+ # @example
596
+ # subframes
597
+ #
598
+ # # =>
599
+ # #<RedAmber::SubFrames : 0x000000000000c33c>
600
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c350>
601
+ # 3 SubFrames: [2, 3, 1] in sizes.
602
+ # ---
603
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c364>
604
+ # x y z
605
+ # <uint8> <string> <boolean>
606
+ # 0 1 A false
607
+ # 1 2 A true
608
+ # ---
609
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
610
+ # x y z
611
+ # <uint8> <string> <boolean>
612
+ # 0 3 B false
613
+ # 1 4 B (nil)
614
+ # 2 5 B true
615
+ # ---
616
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000c38c>
617
+ # x y z
618
+ # <uint8> <string> <boolean>
619
+ # 0 6 C false
620
+ #
621
+ # subframes.assign(:x_plus1) { x + 1 }
622
+ #
623
+ # # =>
624
+ # #<RedAmber::SubFrames : 0x000000000000c3a0>
625
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000c3b4>
626
+ # 3 SubFrames: [2, 3, 1] in sizes.
627
+ # ---
628
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000c3c8>
629
+ # x y z x_plus1
630
+ # <uint8> <string> <boolean> <uint8>
631
+ # 0 1 A false 2
632
+ # 1 2 A true 3
633
+ # ---
634
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000c3dc>
635
+ # x y z x_plus1
636
+ # <uint8> <string> <boolean> <uint8>
637
+ # 0 3 B false 4
638
+ # 1 4 B (nil) 5
639
+ # 2 5 B true 6
640
+ # ---
641
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000c3f0>
642
+ # x y z x_plus1
643
+ # <uint8> <string> <boolean> <uint8>
644
+ # 0 6 C false 7
645
+ #
646
+ # @overload assign(keys)
647
+ # Assign columns by arguments and block.
648
+ #
649
+ # @param keys [Array<Symbol, String>]
650
+ # keys of columns to assign.
651
+ # @yieldparam dataframe [DataFrame]
652
+ # gives overloaded dataframes in self to the block.
653
+ # @yieldreturn [Array<Vector, Array, Arrow::Array>]
654
+ # an updated column values which are overloaded.
655
+ # @return [SubFrames]
656
+ # a new SubFrames object with updated DataFrames.
657
+ # @example
658
+ # subframes.assign(:sum_x, :frac_x) do
659
+ # group_sum = x.sum
660
+ # [[group_sum] * size, x / s.to_f]
661
+ # end
662
+ #
663
+ # # =>
664
+ # #<RedAmber::SubFrames : 0x000000000000fce4>
665
+ # @baseframe=#<RedAmber::DataFrame : 6 x 5 Vectors, 0x000000000000fcf8>
666
+ # 3 SubFrames: [2, 3, 1] in sizes.
667
+ # ---
668
+ # #<RedAmber::DataFrame : 2 x 5 Vectors, 0x000000000000fd0c>
669
+ # x y z sum_x frac_x
670
+ # <uint8> <string> <boolean> <uint8> <double>
671
+ # 0 1 A false 3 0.33
672
+ # 1 2 A true 3 0.67
673
+ # ---
674
+ # #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fd20>
675
+ # x y z sum_x frac_x
676
+ # <uint8> <string> <boolean> <uint8> <double>
677
+ # 0 3 B false 12 0.25
678
+ # 1 4 B (nil) 12 0.33
679
+ # 2 5 B true 12 0.42
680
+ # ---
681
+ # #<RedAmber::DataFrame : 1 x 5 Vectors, 0x000000000000fd34>
682
+ # x y z sum_x frac_x
683
+ # <uint8> <string> <boolean> <uint8> <double>
684
+ # 0 6 C false 6 1.0
685
+ #
686
+ # @overload assign
687
+ # Assign column(s) by block.
688
+ #
689
+ # @yieldparam dataframe [DataFrame]
690
+ # gives overloaded dataframes in self to the block.
691
+ # @yieldreturn [Hash, Array]
692
+ # pairs of keys and column values which are overloaded.
693
+ # @return [SubFrames]
694
+ # a new SubFrames object with updated DataFrames.
695
+ # @example Compute 'x * z' when (true, not_true) = (1, 0) in z
696
+ # subframes.assign do
697
+ # { 'x*z': x * z.if_else(1, 0) }
698
+ # end
699
+ #
700
+ # # =>
701
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
702
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000fdac>
703
+ # 3 SubFrames: [2, 3, 1] in sizes.
704
+ # ---
705
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000fdc0>
706
+ # x y z x*z
707
+ # <uint8> <string> <boolean> <uint8>
708
+ # 0 1 A false 0
709
+ # 1 2 A true 2
710
+ # ---
711
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fdd4>
712
+ # x y z x*z
713
+ # <uint8> <string> <boolean> <uint8>
714
+ # 0 3 B false 0
715
+ # 1 4 B (nil) (nil)
716
+ # 2 5 B true 5
717
+ # ---
718
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000fde8>
719
+ # x y z x*z
720
+ # <uint8> <string> <boolean> <uint8>
721
+ # 0 6 C false 0
722
+ #
723
+ # @since 0.4.0
724
+ #
725
+ def assign(...)
726
+ map { |df| df.assign(...) }
727
+ end
728
+
729
+ # Returns a SubFrames containing DataFrames selected by the block.
730
+ #
731
+ # With a block given, calls the block with successive DataFrames;
732
+ # returns a SubFrames of those DataFrames for
733
+ # which the block returns a truthy value.
734
+ #
735
+ # @example Select all.
736
+ # subframes.select { true }
737
+ #
738
+ # # =>
739
+ # #<RedAmber::SubFrames : 0x0000000000003a84>
740
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
741
+ # 3 SubFrames: [2, 3, 1] in sizes.
742
+ # ---
743
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
744
+ # x y z
745
+ # <uint8> <string> <boolean>
746
+ # 0 1 A false
747
+ # 1 2 A true
748
+ # ---
749
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
750
+ # x y z
751
+ # <uint8> <string> <boolean>
752
+ # 0 3 B false
753
+ # 1 4 B (nil)
754
+ # 2 5 B true
755
+ # ---
756
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
757
+ # x y z
758
+ # <uint8> <string> <boolean>
759
+ # 0 6 C false
760
+ #
761
+ # @example Select nothing.
762
+ # subframes.select { false }
763
+ #
764
+ # # =>
765
+ # #<RedAmber::SubFrames : 0x00000000000238c0>
766
+ # @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
767
+ # 0 SubFrame: [] in size.
768
+ # ---
769
+ #
770
+ # @example Select if Vector `:z` has any true.
771
+ # subframes.select { |df| df[:z].any? }
772
+ #
773
+ # # =>
774
+ # #<RedAmber::SubFrames : 0x000000000000fba4>
775
+ # @baseframe=#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbb8>
776
+ # 2 SubFrames: [2, 1] in sizes.
777
+ # ---
778
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
779
+ # x y z
780
+ # <uint8> <string> <boolean>
781
+ # 0 1 A false
782
+ # 1 2 A true
783
+ # ---
784
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
785
+ # x y z
786
+ # <uint8> <string> <boolean>
787
+ # 0 3 B false
788
+ # 1 4 B (nil)
789
+ # 2 5 B true
790
+ #
791
+ # @since 0.4.0
792
+ #
793
+ define_subframable_method :select
794
+ alias_method :filter, :select
795
+ alias_method :find_all, :select
796
+
797
+ # Returns a SubFrames containing DataFrames rejected by the block.
798
+ #
799
+ # With a block given, calls the block with successive DataFrames;
800
+ # returns a SubFrames of those DataFrames for
801
+ # which the block returns nil or false.
802
+ # @example Reject all.
803
+ # subframes.reject { true }
804
+ #
805
+ # # =>
806
+ # #<RedAmber::SubFrames : 0x00000000000238c0>
807
+ # @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
808
+ # 0 SubFrame: [] in size.
809
+ # ---
810
+ #
811
+ # @example Reject nothing.
812
+ # subframes.reject { false }
813
+ #
814
+ # # =>
815
+ # #<RedAmber::SubFrames : 0x0000000000003a84>
816
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
817
+ # 3 SubFrames: [2, 3, 1] in sizes.
818
+ # ---
819
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
820
+ # x y z
821
+ # <uint8> <string> <boolean>
822
+ # 0 1 A false
823
+ # 1 2 A true
824
+ # ---
825
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
826
+ # x y z
827
+ # <uint8> <string> <boolean>
828
+ # 0 3 B false
829
+ # 1 4 B (nil)
830
+ # 2 5 B true
831
+ # ---
832
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
833
+ # x y z
834
+ # <uint8> <string> <boolean>
835
+ # 0 6 C false
836
+ #
837
+ # @example Reject if Vector `:z` has any true.
838
+ # subframes.reject { |df| df[:z].any? }
839
+ #
840
+ # # =>
841
+ # #<RedAmber::SubFrames : 0x0000000000038d74>
842
+ # @baseframe=#<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
843
+ # 1 SubFrame: [1] in size.
844
+ # ---
845
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
846
+ # x y z
847
+ # <uint8> <string> <boolean>
848
+ # 0 6 C false
849
+ #
850
+ # @since 0.4.0
851
+ #
852
+ define_subframable_method :reject
853
+
854
+ # Returns a SubFrames containing truthy DataFrames returned by the block.
855
+ #
856
+ # With a block given, calls the block with successive DataFrames;
857
+ # returns a SubFrames of those DataFrames for
858
+ # which the block returns nil or false.
859
+ # @example Filter for size is larger than 1 and append number to column 'y'.
860
+ # subframes.filter_map do |df|
861
+ # if df.size > 1
862
+ # df.assign(:y) do
863
+ # y.merge(indices('1'), sep: '')
864
+ # end
865
+ # end
866
+ # end
867
+ #
868
+ # # =>
869
+ # #<RedAmber::SubFrames : 0x000000000001da88>
870
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000001da9c>
871
+ # 2 SubFrames: [2, 3] in sizes.
872
+ # ---
873
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000001dab0>
874
+ # x y z
875
+ # <uint8> <string> <boolean>
876
+ # 0 1 A1 false
877
+ # 1 2 A2 true
878
+ # ---
879
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000001dac4>
880
+ # x y z
881
+ # <uint8> <string> <boolean>
882
+ # 0 3 B1 false
883
+ # 1 4 B2 (nil)
884
+ # 2 5 B3 true
885
+ #
886
+ # @since 0.4.0
887
+ #
888
+ define_subframable_method :filter_map
889
+
890
+ # Number of subsets.
891
+ #
892
+ # @return [Integer]
893
+ # number of subsets in self.
894
+ # @since 0.4.0
895
+ #
896
+ def size
897
+ @size ||= @enum.size
898
+ end
899
+
900
+ # Size list of subsets.
901
+ #
902
+ # @return [Array<Integer>]
903
+ # sizes of sub DataFrames.
904
+ # @since 0.4.0
905
+ #
906
+ def sizes
907
+ @sizes ||= @enum.map(&:size)
908
+ end
909
+
910
+ # Indices at the top of each sub DataFrames.
911
+ #
912
+ # @return [Array<Integer>]
913
+ # indices of offset of each sub DataFrames.
914
+ # @example When `sizes` is [2, 3, 1].
915
+ # sf.offset_indices # => [0, 2, 5]
916
+ # @since 0.4.0
917
+ #
918
+ def offset_indices
919
+ sum = 0
920
+ sizes.map do |size|
921
+ sum += size
922
+ sum - size
923
+ end
924
+ end
925
+
926
+ # Test if subset is empty?.
927
+ #
928
+ # @return [true, false]
929
+ # true if self is an empty subset.
930
+ # @since 0.4.0
931
+ #
932
+ def empty?
933
+ size.zero?
934
+ end
935
+
936
+ # Test if self has only one subset and it is comprehensive.
937
+ #
938
+ # @return [true, false]
939
+ # true if only member of self is equal to universal DataFrame.
940
+ # @since 0.4.0
941
+ #
942
+ def universal?
943
+ size == 1 && @enum.first == baseframe
944
+ end
945
+
946
+ # Return string representation of self.
947
+ #
948
+ # @param limit [Integer]
949
+ # maximum number of DataFrames to show.
950
+ # @return [String]
951
+ # return string representation of each sub DataFrame.
952
+ # @example
953
+ # df
954
+ #
955
+ # # =>
956
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
957
+ # x y z
958
+ # <uint8> <string> <boolean>
959
+ # 0 1 A false
960
+ # 1 2 A true
961
+ # 2 3 B false
962
+ # 3 4 B (nil)
963
+ # 4 5 B true
964
+ # 5 6 C false
965
+ #
966
+ # puts SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
967
+ #
968
+ # # =>
969
+ # x y z
970
+ # <uint8> <string> <boolean>
971
+ # 0 1 A false
972
+ # 1 2 A true
973
+ # ---
974
+ # x y z
975
+ # <uint8> <string> <boolean>
976
+ # 0 3 B false
977
+ # 1 4 B (nil)
978
+ # 2 5 B true
979
+ # ---
980
+ # x y z
981
+ # <uint8> <string> <boolean>
982
+ # 0 6 C false
983
+ #
984
+ # @since 0.4.0
985
+ #
986
+ def to_s(limit: 16)
987
+ _to_s(limit: limit)
988
+ end
989
+
990
+ # Return summary information of self.
991
+ #
992
+ # @param limit [Integer]
993
+ # maximum number of DataFrames to show.
994
+ # @return [String]
995
+ # return class name, object id, universal DataFrame,
996
+ # size and subset sizes in a String.
997
+ # @example
998
+ # df
999
+ #
1000
+ # # =>
1001
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
1002
+ # x y z
1003
+ # <uint8> <string> <boolean>
1004
+ # 0 1 A false
1005
+ # 1 2 A true
1006
+ # 2 3 B false
1007
+ # 3 4 B (nil)
1008
+ # 4 5 B true
1009
+ # 5 6 C false
1010
+ #
1011
+ # SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
1012
+ #
1013
+ # # =>
1014
+ # #<RedAmber::SubFrames : 0x000000000000c1fc>
1015
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c170>
1016
+ # 3 SubFrames: [2, 3, 1] in sizes.
1017
+ # ---
1018
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
1019
+ # x y z
1020
+ # <uint8> <string> <boolean>
1021
+ # 0 1 A false
1022
+ # 1 2 A true
1023
+ # ---
1024
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a134>
1025
+ # x y z
1026
+ # <uint8> <string> <boolean>
1027
+ # 0 3 B false
1028
+ # 1 4 B (nil)
1029
+ # 2 5 B true
1030
+ # ---
1031
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
1032
+ # x y z
1033
+ # <uint8> <string> <boolean>
1034
+ # 0 6 C false
1035
+ #
1036
+ # @since 0.4.0
1037
+ #
1038
+ def inspect(limit: 16)
1039
+ sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ')
1040
+ "#<#{self.class} : #{format('0x%016x', object_id)}>\n" \
1041
+ "@baseframe=#<#{baseframe.shape_str(with_id: true)}>\n" \
1042
+ "#{size} SubFrame#{pl(size)}: " \
1043
+ "[#{sizes_truncated}] in size#{pl(size)}.\n" \
1044
+ "---\n#{_to_s(limit: limit, with_id: true)}"
1045
+ end
1046
+
1047
+ private
1048
+
1049
+ def frames
1050
+ @frames ||= @enum.to_a
1051
+ end
1052
+
1053
+ def _to_s(limit: 16, with_id: false)
1054
+ a = take(limit).map do |df|
1055
+ if with_id
1056
+ "#<#{df.shape_str(with_id: with_id)}>\n" \
1057
+ "#{df.to_s(head: 2, tail: 2)}"
1058
+ else
1059
+ df.to_s(head: 2, tail: 2)
1060
+ end
1061
+ end
1062
+ a << "+ #{size - limit} more DataFrame#{pl(size - limit)}.\n" if size > limit
1063
+ a.join("---\n")
1064
+ end
1065
+ end
1066
+ end