red_amber 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -0,0 +1,1066 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # class SubFrames treats a set of subsets of a DataFrame
5
+ # [Experimental feature] Class SubFrames may be removed or be changed in the future.
6
+ class SubFrames
7
+ include Enumerable # may change to use Forwardable.
8
+ include Helper
9
+
10
+ using RefineArray
11
+ using RefineArrayLike
12
+
13
+ class << self
14
+ # Create SubFrames from a Group.
15
+ #
16
+ # [Experimental feature] this method may be removed or be changed in the future.
17
+ # @param group [Group]
18
+ # a Group to be used to create SubFrames.
19
+ # @return [SubFrames]
20
+ # a created SubFrames.
21
+ # @example
22
+ # dataframe
23
+ #
24
+ # # =>
25
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
26
+ # x y z
27
+ # <uint8> <string> <boolean>
28
+ # 0 1 A false
29
+ # 1 2 A true
30
+ # 2 3 B false
31
+ # 3 4 B (nil)
32
+ # 4 5 B true
33
+ # 5 6 C false
34
+ #
35
+ # group = Group.new(dataframe, [:y])
36
+ # sf = SubFrames.by_group(group)
37
+ #
38
+ # # =>
39
+ # #<RedAmber::SubFrames : 0x000000000000fbb8>
40
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fb7c>
41
+ # 3 SubFrames: [2, 3, 1] in sizes.
42
+ # ---
43
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fbcc>
44
+ # x y z
45
+ # <uint8> <string> <boolean>
46
+ # 0 1 A false
47
+ # 1 2 A true
48
+ # ---
49
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbe0>
50
+ # x y z
51
+ # <uint8> <string> <boolean>
52
+ # 0 3 B false
53
+ # 1 4 B (nil)
54
+ # 2 5 B true
55
+ # ---
56
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fbf4>
57
+ # x y z
58
+ # <uint8> <string> <boolean>
59
+ # 0 6 C false
60
+ #
61
+ # @since 0.4.0
62
+ #
63
+ def by_group(group)
64
+ SubFrames.new(group.dataframe, group.filters)
65
+ end
66
+
67
+ # Create a new SubFrames object from a DataFrame and an array of indices.
68
+ #
69
+ # @api private
70
+ # @note this method doesn't check arguments.
71
+ # @param dataframe [DataFrame]
72
+ # a source dataframe.
73
+ # @param subset_indices [Array, Array<Vector>]
74
+ # an Array of numeric indices to create subsets of DataFrame.
75
+ # @return [SubFrames]
76
+ # a new SubFrames object.
77
+ # @since 0.4.0
78
+ #
79
+ def by_indices(dataframe, subset_indices)
80
+ instance = allocate
81
+ instance.instance_variable_set(:@baseframe, dataframe)
82
+ enum =
83
+ Enumerator.new(subset_indices.size) do |y|
84
+ subset_indices.each do |i|
85
+ y.yield dataframe.take(i)
86
+ end
87
+ end
88
+ instance.instance_variable_set(:@enum, enum)
89
+ instance
90
+ end
91
+
92
+ # Create a new SubFrames object from a DataFrame and an array of filters.
93
+ #
94
+ # @api private
95
+ # @note this method doesn't check arguments.
96
+ # @param dataframe [DataFrame]
97
+ # a source dataframe.
98
+ # @param subset_filters [Array, Array<Vector>]
99
+ # an Array of booleans to specify subsets of DataFrame.
100
+ # Each filters must have same length as dataframe.
101
+ # @return [SubFrames]
102
+ # a new SubFrames object.
103
+ # @since 0.4.0
104
+ #
105
+ def by_filters(dataframe, subset_filters)
106
+ instance = allocate
107
+ instance.instance_variable_set(:@baseframe, dataframe)
108
+ enum =
109
+ Enumerator.new(subset_filters.size) do |y|
110
+ subset_filters.each do |i|
111
+ y.yield dataframe.filter(i)
112
+ end
113
+ end
114
+ instance.instance_variable_set(:@enum, enum)
115
+ instance
116
+ end
117
+
118
+ # Create a new SubFrames from an Array of DataFrames.
119
+ #
120
+ # @api private
121
+ # @note dataframes must have same schema.
122
+ # @param dataframes [Array<DataFrame>]
123
+ # an array of DataFrames which have same schema.
124
+ # @return [SubFrames]
125
+ # a new SubFrames object.
126
+ # @since 0.4.0
127
+ #
128
+ def by_dataframes(dataframes)
129
+ instance = allocate
130
+ case Array(dataframes)
131
+ when [] || [nil]
132
+ instance.instance_variable_set(:@baseframe, DataFrame.new)
133
+ instance.instance_variable_set(:@frames, [])
134
+ enum = [].each
135
+ else
136
+ enum =
137
+ Enumerator.new(dataframes.size) do |y|
138
+ dataframes.each do |i|
139
+ y.yield i
140
+ end
141
+ end
142
+ instance.instance_variable_set(:@baseframe, enum.reduce(&:concatenate))
143
+ end
144
+ instance.instance_variable_set(:@enum, enum)
145
+ instance
146
+ end
147
+
148
+ private
149
+
150
+ # This method upgrades a iterating method from Enumerable to return SubFrames.
151
+
152
+ # @!macro [attach] define_subframable_method
153
+ #
154
+ # [Returns SubFrames] Use `#each.$1` if you want to get DataFrames by Array.
155
+ # Returns an Enumerator with no block given.
156
+ # @yieldparam dataframe [DataFrame]
157
+ # gives each element.
158
+ # @yieldreturn [Array<DataFrame>]
159
+ # the block should return DataFrames with same schema.
160
+ # @return [SubFrames]
161
+ # a new SubFrames.
162
+ #
163
+ def define_subframable_method(method)
164
+ define_method(method) do |&block|
165
+ return enum_for(:each) { size } unless block # rubocop:disable Lint/ToEnumArguments
166
+
167
+ self.class.by_dataframes(super(&block))
168
+ end
169
+ end
170
+ end
171
+
172
+ # Create a new SubFrames object from a DataFrame and an array of indices or filters.
173
+ #
174
+ # @overload initialize(dataframe, subset_specifier)
175
+ # Create a new SubFrames object.
176
+ #
177
+ # @param dataframe [DataFrame]
178
+ # a source dataframe.
179
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
180
+ # an Array of numeric indices or boolean filters
181
+ # to create subsets of DataFrame.
182
+ # @return [SubFrames]
183
+ # new SubFrames.
184
+ # @example
185
+ # dataframe
186
+ #
187
+ # # =>
188
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000039e4>
189
+ # x y z
190
+ # <uint8> <string> <boolean>
191
+ # 0 1 A false
192
+ # 1 2 A true
193
+ # 2 3 B false
194
+ # 3 4 B (nil)
195
+ # 4 5 B true
196
+ # 5 6 C false
197
+ #
198
+ # SubFrames.new(dataframe, [[0, 2, 3], [4, 1]])
199
+ #
200
+ # # =>
201
+ # #<RedAmber::SubFrames : 0x0000000000003a34>
202
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000003a48>
203
+ # 2 SubFrames: [3, 2] in sizes.
204
+ # ---
205
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a5c>
206
+ # x y z
207
+ # <uint8> <string> <boolean>
208
+ # 0 1 A false
209
+ # 1 3 B false
210
+ # 2 4 B (nil)
211
+ # ---
212
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a70>
213
+ # x y z
214
+ # <uint8> <string> <boolean>
215
+ # 0 5 B true
216
+ # 1 2 A true
217
+ #
218
+ # @overload initialize(dataframe)
219
+ # Create a new SubFrames object by block.
220
+ #
221
+ # @param dataframe [DataFrame]
222
+ # a source dataframe.
223
+ # @yieldparam dataframe [DataFrame]
224
+ # the block is called with `dataframe`.
225
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
226
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
227
+ # All array-likes are responsible to #numeric? or #boolean?.
228
+ # @return [SubFrames]
229
+ # a new SubFrames object.
230
+ # @example
231
+ # SubFrames.new(dataframe) do |df|
232
+ # booleans = df[:z]
233
+ # [booleans, !booleans]
234
+ # end
235
+ #
236
+ # # =>
237
+ # #<RedAmber::SubFrames : 0x0000000000003aac>
238
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000003ac0>
239
+ # 2 SubFrames: [2, 3] in sizes.
240
+ # ---
241
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003ad4>
242
+ # x y z
243
+ # <uint8> <string> <boolean>
244
+ # 0 2 A true
245
+ # 1 5 B true
246
+ # ---
247
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003ae8>
248
+ # x y z
249
+ # <uint8> <string> <boolean>
250
+ # 0 1 A false
251
+ # 1 3 B false
252
+ # 2 6 C false
253
+ #
254
+ # @since 0.4.0
255
+ #
256
+ def initialize(dataframe, subset_specifier = nil, &block)
257
+ unless dataframe.is_a?(DataFrame)
258
+ raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
259
+ end
260
+
261
+ if block
262
+ unless subset_specifier.nil?
263
+ raise SubFramesArgumentError, 'Must not specify both arguments and block.'
264
+ end
265
+
266
+ subset_specifier = yield(dataframe)
267
+ end
268
+
269
+ if dataframe.empty? || subset_specifier.nil? || subset_specifier.empty?
270
+ @baseframe = DataFrame.new
271
+ @frames = []
272
+ @enum = @frames.each
273
+ else
274
+ @baseframe = nil
275
+ @enum =
276
+ Enumerator.new(subset_specifier.size) do |yielder|
277
+ subset_specifier.map do |i|
278
+ df =
279
+ if i.numeric?
280
+ dataframe.take(i)
281
+ elsif i.boolean?
282
+ dataframe.filter(i)
283
+ else
284
+ raise SubFramesArgumentError, "illegal type: #{i}"
285
+ end
286
+ yielder.yield df
287
+ end
288
+ end
289
+ end
290
+ end
291
+
292
+ # Return concatenated SubFrames as a DataDrame.
293
+ #
294
+ # Once evaluated, memorize it as @baseframe.
295
+ # @return [DataFrame]
296
+ # a concatenated DataFrame.
297
+ # @since 0.4.0
298
+ #
299
+ def baseframe
300
+ @baseframe ||= reduce(&:concatenate)
301
+ end
302
+ alias_method :concatenate, :baseframe
303
+ alias_method :concat, :baseframe
304
+
305
+ # Iterates over sub DataFrames or returns an Enumerator.
306
+ #
307
+ # This method will memorize sub DataFrames and always returns the same object.
308
+ # The Class SubFrames is including Enumerable module.
309
+ # So many methods in Enumerable are available.
310
+ #
311
+ # @overload each
312
+ # Returns a new Enumerator if no block given.
313
+ #
314
+ # @return [Enumerator]
315
+ # Enumerator of each elements.
316
+ #
317
+ # @overload each
318
+ # When a block given, passes each sub DataFrames to the block.
319
+ #
320
+ # @yieldparam subframe [DataFrame]
321
+ # passes sub DataFrame by a block parameter.
322
+ # @yieldreturn [Object]
323
+ # evaluated result value from the block.
324
+ # @return [self]
325
+ # returns self.
326
+ #
327
+ # @example Returns Enumerator
328
+ # sf.each
329
+ #
330
+ # # =>
331
+ # #<Enumerator: ...>
332
+ #
333
+ # @example `to_a` from Enumerable.
334
+ # sf.to_a
335
+ #
336
+ # # =>
337
+ # [#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
338
+ # x y z
339
+ # <uint8> <string> <boolean>
340
+ # 0 1 A false
341
+ # 1 2 A true
342
+ # ,
343
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000002a134>
344
+ # x y z
345
+ # <uint8> <string> <boolean>
346
+ # 0 3 B false
347
+ # 1 4 B (nil)
348
+ # 2 5 B true
349
+ # ,
350
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
351
+ # x y z
352
+ # <uint8> <string> <boolean>
353
+ # 0 6 C false
354
+ # ]
355
+ #
356
+ # @example Concatenate SubFrames. This example is used in #concatenate.
357
+ # sf.reduce(&:concatenate)
358
+ #
359
+ # # =>
360
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000004883c>
361
+ # x y z
362
+ # <uint8> <string> <boolean>
363
+ # 0 1 A false
364
+ # 1 2 A true
365
+ # 2 3 B false
366
+ # 3 4 B (nil)
367
+ # 4 5 B true
368
+ # 5 6 C false
369
+ #
370
+ # @since 0.4.0
371
+ #
372
+ def each(&block)
373
+ return enum_for(__method__) { size } unless block
374
+
375
+ frames.each(&block)
376
+ nil
377
+ end
378
+
379
+ # Aggregate SubFrames to create a DataFrame.
380
+ #
381
+ # This method will check if built-in aggregation function is used.
382
+ # @todo Support user-defined aggregation functions.
383
+ #
384
+ # @overload aggregate(group_keys, aggregations)
385
+ #
386
+ # Aggregate SubFrames for first values of the columns of
387
+ # `group_keys` and the aggregated results of key-function pairs.
388
+ #
389
+ # @param group_keys [Symbol, String, Array<Symbol, String>]
390
+ # group key name(s) to output values.
391
+ # @param aggregations [Hash<Array<Symbol, String> => Array<:Symbol>>]
392
+ # a Hash of variable (column) name and
393
+ # Vector aggregate function name to apply.
394
+ # @return [DataFrame]
395
+ # an aggregated DataFrame.
396
+ # @example
397
+ # subframes
398
+ #
399
+ # # =>
400
+ # #<RedAmber::SubFrames : 0x0000000000003980>
401
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003994>
402
+ # 3 SubFrames: [2, 3, 1] in sizes.
403
+ # ---
404
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000039a8>
405
+ # x y z
406
+ # <uint8> <string> <boolean>
407
+ # 0 1 A false
408
+ # 1 2 A true
409
+ # ---
410
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000039bc>
411
+ # x y z
412
+ # <uint8> <string> <boolean>
413
+ # 0 3 B false
414
+ # 1 4 B (nil)
415
+ # 2 5 B true
416
+ # ---
417
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000039d0>
418
+ # x y z
419
+ # <uint8> <string> <boolean>
420
+ # 0 6 C false
421
+ #
422
+ # subframes.aggregate(:y, { x: :sum })
423
+ #
424
+ # # =>
425
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
426
+ # y sum_x
427
+ # <string> <uint8>
428
+ # 0 A 3
429
+ # 1 B 12
430
+ # 2 C 6
431
+ #
432
+ # @overload aggregate(group_keys, aggregations)
433
+ #
434
+ # Aggregate SubFrames for first values of the columns of
435
+ # `group_keys` and the aggregated results of all combinations
436
+ # of supplied keys and functions.
437
+ #
438
+ # @param group_keys [Symbol, String, Array<Symbol, String>]
439
+ # group key name(s) to output values.
440
+ # @param aggregations [Array[Array<Symbol, String>, Array<:Symbol>]]
441
+ # an Array of Array of variable (column) names and
442
+ # Array of Vector aggregate function names to apply.
443
+ # @return [DataFrame]
444
+ # an aggregated DataFrame.
445
+ # @example
446
+ # sf.aggregate(:y, [[:x, :z], [:count, :sum]])
447
+ #
448
+ # # =>
449
+ # #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fcbc>
450
+ # y count_x count_z sum_x sum_z
451
+ # <string> <uint8> <uint8> <uint8> <uint8>
452
+ # 0 A 2 2 3 1
453
+ # 1 B 3 2 12 1
454
+ # 2 C 1 1 6 0
455
+ #
456
+ # @since 0.4.0
457
+ #
458
+ def aggregate(group_keys, aggregations)
459
+ aggregator =
460
+ case aggregations
461
+ in Hash
462
+ sf = self
463
+ aggregations.map do |key, func|
464
+ unless Vector.aggregate?(func)
465
+ raise SubFramesArgumentError, "not an aggregation function: #{func}"
466
+ end
467
+
468
+ ["#{func}_#{key}", sf.each.map { |df| df[key].send(func) }]
469
+ end
470
+ in [Array => keys, Array => functions]
471
+ functions.each do |func|
472
+ unless Vector.aggregate?(func)
473
+ raise SubFramesArgumentError, "not an aggregation function: #{func}"
474
+ end
475
+ end
476
+ sf = self
477
+ functions.product(keys).map do |func, key|
478
+ ["#{func}_#{key}", sf.each.map { |df| df[key].send(func) }]
479
+ end
480
+ else
481
+ raise SubFramesArgumentError, "invalid argument: #{aggregations}"
482
+ end
483
+
484
+ if group_keys.empty?
485
+ DataFrame.new(aggregator)
486
+ else
487
+ baseframe
488
+ .pick(group_keys)
489
+ .slice(offset_indices)
490
+ .assign(aggregator)
491
+ end
492
+ end
493
+
494
+ # Returns a SubFrames containing DataFrames returned by the block.
495
+ #
496
+ # @example Map as it is.
497
+ # subframes
498
+ #
499
+ # # =>
500
+ # #<RedAmber::SubFrames : 0x000000000001359c>
501
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000135b0>
502
+ # 3 SubFrames: [2, 3, 1] in sizes.
503
+ # ---
504
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
505
+ # x y z
506
+ # <uint8> <string> <boolean>
507
+ # 0 1 A false
508
+ # 1 2 A true
509
+ # ---
510
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
511
+ # x y z
512
+ # <uint8> <string> <boolean>
513
+ # 0 3 B false
514
+ # 1 4 B (nil)
515
+ # 2 5 B true
516
+ # ---
517
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
518
+ # x y z
519
+ # <uint8> <string> <boolean>
520
+ # 0 6 C false
521
+ #
522
+ # subframes.map { _1 }
523
+ #
524
+ # # This will create a new SubFrame and a new baseframe,
525
+ # # But each element DataFrames are re-used.
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000001e6cc>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000001e6e0>
529
+ # 3 SubFrames: [2, 3, 1] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # ---
537
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
538
+ # x y z
539
+ # <uint8> <string> <boolean>
540
+ # 0 3 B false
541
+ # 1 4 B (nil)
542
+ # 2 5 B true
543
+ # ---
544
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
545
+ # x y z
546
+ # <uint8> <string> <boolean>
547
+ # 0 6 C false
548
+ #
549
+ # @example Assign a new column.
550
+ # subframes.map { |df| df.assign(x_plus1: df[:x] + 1) }
551
+ #
552
+ # # =>
553
+ # #<RedAmber::SubFrames : 0x0000000000040948>
554
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000004095c>
555
+ # 3 SubFrames: [2, 3, 1] in sizes.
556
+ # ---
557
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x0000000000040970>
558
+ # x y z x_plus1
559
+ # <uint8> <string> <boolean> <uint8>
560
+ # 0 1 A false 2
561
+ # 1 2 A true 3
562
+ # ---
563
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x0000000000040984>
564
+ # x y z x_plus1
565
+ # <uint8> <string> <boolean> <uint8>
566
+ # 0 3 B false 4
567
+ # 1 4 B (nil) 5
568
+ # 2 5 B true 6
569
+ # ---
570
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x0000000000040998>
571
+ # x y z x_plus1
572
+ # <uint8> <string> <boolean> <uint8>
573
+ # 0 6 C false 7
574
+ #
575
+ # @since 0.4.0
576
+ #
577
+ define_subframable_method :map
578
+ alias_method :collect, :map
579
+
580
+ # Update existing column(s) or create new columns(s) for each DataFrames in self.
581
+ #
582
+ # Column values are updated by an oveloaded common operation.
583
+ #
584
+ # @overload assign(key)
585
+ # Assign a column by argument and block.
586
+ #
587
+ # @param key [Symbol, String]
588
+ # a key of column to assign.
589
+ # @yieldparam dataframe [DataFrame]
590
+ # gives overloaded dataframe in self to the block.
591
+ # @yieldreturn [Vector, Array, Arrow::Array]
592
+ # an updated column value which are overloaded.
593
+ # @return [SubFrames]
594
+ # a new SubFrames object with updated DataFrames.
595
+ # @example
596
+ # subframes
597
+ #
598
+ # # =>
599
+ # #<RedAmber::SubFrames : 0x000000000000c33c>
600
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c350>
601
+ # 3 SubFrames: [2, 3, 1] in sizes.
602
+ # ---
603
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c364>
604
+ # x y z
605
+ # <uint8> <string> <boolean>
606
+ # 0 1 A false
607
+ # 1 2 A true
608
+ # ---
609
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
610
+ # x y z
611
+ # <uint8> <string> <boolean>
612
+ # 0 3 B false
613
+ # 1 4 B (nil)
614
+ # 2 5 B true
615
+ # ---
616
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000c38c>
617
+ # x y z
618
+ # <uint8> <string> <boolean>
619
+ # 0 6 C false
620
+ #
621
+ # subframes.assign(:x_plus1) { x + 1 }
622
+ #
623
+ # # =>
624
+ # #<RedAmber::SubFrames : 0x000000000000c3a0>
625
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000c3b4>
626
+ # 3 SubFrames: [2, 3, 1] in sizes.
627
+ # ---
628
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000c3c8>
629
+ # x y z x_plus1
630
+ # <uint8> <string> <boolean> <uint8>
631
+ # 0 1 A false 2
632
+ # 1 2 A true 3
633
+ # ---
634
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000c3dc>
635
+ # x y z x_plus1
636
+ # <uint8> <string> <boolean> <uint8>
637
+ # 0 3 B false 4
638
+ # 1 4 B (nil) 5
639
+ # 2 5 B true 6
640
+ # ---
641
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000c3f0>
642
+ # x y z x_plus1
643
+ # <uint8> <string> <boolean> <uint8>
644
+ # 0 6 C false 7
645
+ #
646
+ # @overload assign(keys)
647
+ # Assign columns by arguments and block.
648
+ #
649
+ # @param keys [Array<Symbol, String>]
650
+ # keys of columns to assign.
651
+ # @yieldparam dataframe [DataFrame]
652
+ # gives overloaded dataframes in self to the block.
653
+ # @yieldreturn [Array<Vector, Array, Arrow::Array>]
654
+ # an updated column values which are overloaded.
655
+ # @return [SubFrames]
656
+ # a new SubFrames object with updated DataFrames.
657
+ # @example
658
+ # subframes.assign(:sum_x, :frac_x) do
659
+ # group_sum = x.sum
660
+ # [[group_sum] * size, x / s.to_f]
661
+ # end
662
+ #
663
+ # # =>
664
+ # #<RedAmber::SubFrames : 0x000000000000fce4>
665
+ # @baseframe=#<RedAmber::DataFrame : 6 x 5 Vectors, 0x000000000000fcf8>
666
+ # 3 SubFrames: [2, 3, 1] in sizes.
667
+ # ---
668
+ # #<RedAmber::DataFrame : 2 x 5 Vectors, 0x000000000000fd0c>
669
+ # x y z sum_x frac_x
670
+ # <uint8> <string> <boolean> <uint8> <double>
671
+ # 0 1 A false 3 0.33
672
+ # 1 2 A true 3 0.67
673
+ # ---
674
+ # #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fd20>
675
+ # x y z sum_x frac_x
676
+ # <uint8> <string> <boolean> <uint8> <double>
677
+ # 0 3 B false 12 0.25
678
+ # 1 4 B (nil) 12 0.33
679
+ # 2 5 B true 12 0.42
680
+ # ---
681
+ # #<RedAmber::DataFrame : 1 x 5 Vectors, 0x000000000000fd34>
682
+ # x y z sum_x frac_x
683
+ # <uint8> <string> <boolean> <uint8> <double>
684
+ # 0 6 C false 6 1.0
685
+ #
686
+ # @overload assign
687
+ # Assign column(s) by block.
688
+ #
689
+ # @yieldparam dataframe [DataFrame]
690
+ # gives overloaded dataframes in self to the block.
691
+ # @yieldreturn [Hash, Array]
692
+ # pairs of keys and column values which are overloaded.
693
+ # @return [SubFrames]
694
+ # a new SubFrames object with updated DataFrames.
695
+ # @example Compute 'x * z' when (true, not_true) = (1, 0) in z
696
+ # subframes.assign do
697
+ # { 'x*z': x * z.if_else(1, 0) }
698
+ # end
699
+ #
700
+ # # =>
701
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
702
+ # @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000fdac>
703
+ # 3 SubFrames: [2, 3, 1] in sizes.
704
+ # ---
705
+ # #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000fdc0>
706
+ # x y z x*z
707
+ # <uint8> <string> <boolean> <uint8>
708
+ # 0 1 A false 0
709
+ # 1 2 A true 2
710
+ # ---
711
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fdd4>
712
+ # x y z x*z
713
+ # <uint8> <string> <boolean> <uint8>
714
+ # 0 3 B false 0
715
+ # 1 4 B (nil) (nil)
716
+ # 2 5 B true 5
717
+ # ---
718
+ # #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000fde8>
719
+ # x y z x*z
720
+ # <uint8> <string> <boolean> <uint8>
721
+ # 0 6 C false 0
722
+ #
723
+ # @since 0.4.0
724
+ #
725
+ def assign(...)
726
+ map { |df| df.assign(...) }
727
+ end
728
+
729
+ # Returns a SubFrames containing DataFrames selected by the block.
730
+ #
731
+ # With a block given, calls the block with successive DataFrames;
732
+ # returns a SubFrames of those DataFrames for
733
+ # which the block returns a truthy value.
734
+ #
735
+ # @example Select all.
736
+ # subframes.select { true }
737
+ #
738
+ # # =>
739
+ # #<RedAmber::SubFrames : 0x0000000000003a84>
740
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
741
+ # 3 SubFrames: [2, 3, 1] in sizes.
742
+ # ---
743
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
744
+ # x y z
745
+ # <uint8> <string> <boolean>
746
+ # 0 1 A false
747
+ # 1 2 A true
748
+ # ---
749
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
750
+ # x y z
751
+ # <uint8> <string> <boolean>
752
+ # 0 3 B false
753
+ # 1 4 B (nil)
754
+ # 2 5 B true
755
+ # ---
756
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
757
+ # x y z
758
+ # <uint8> <string> <boolean>
759
+ # 0 6 C false
760
+ #
761
+ # @example Select nothing.
762
+ # subframes.select { false }
763
+ #
764
+ # # =>
765
+ # #<RedAmber::SubFrames : 0x00000000000238c0>
766
+ # @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
767
+ # 0 SubFrame: [] in size.
768
+ # ---
769
+ #
770
+ # @example Select if Vector `:z` has any true.
771
+ # subframes.select { |df| df[:z].any? }
772
+ #
773
+ # # =>
774
+ # #<RedAmber::SubFrames : 0x000000000000fba4>
775
+ # @baseframe=#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbb8>
776
+ # 2 SubFrames: [2, 1] in sizes.
777
+ # ---
778
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
779
+ # x y z
780
+ # <uint8> <string> <boolean>
781
+ # 0 1 A false
782
+ # 1 2 A true
783
+ # ---
784
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
785
+ # x y z
786
+ # <uint8> <string> <boolean>
787
+ # 0 3 B false
788
+ # 1 4 B (nil)
789
+ # 2 5 B true
790
+ #
791
+ # @since 0.4.0
792
+ #
793
+ define_subframable_method :select
794
+ alias_method :filter, :select
795
+ alias_method :find_all, :select
796
+
797
+ # Returns a SubFrames containing DataFrames rejected by the block.
798
+ #
799
+ # With a block given, calls the block with successive DataFrames;
800
+ # returns a SubFrames of those DataFrames for
801
+ # which the block returns nil or false.
802
+ # @example Reject all.
803
+ # subframes.reject { true }
804
+ #
805
+ # # =>
806
+ # #<RedAmber::SubFrames : 0x00000000000238c0>
807
+ # @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
808
+ # 0 SubFrame: [] in size.
809
+ # ---
810
+ #
811
+ # @example Reject nothing.
812
+ # subframes.reject { false }
813
+ #
814
+ # # =>
815
+ # #<RedAmber::SubFrames : 0x0000000000003a84>
816
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
817
+ # 3 SubFrames: [2, 3, 1] in sizes.
818
+ # ---
819
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
820
+ # x y z
821
+ # <uint8> <string> <boolean>
822
+ # 0 1 A false
823
+ # 1 2 A true
824
+ # ---
825
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
826
+ # x y z
827
+ # <uint8> <string> <boolean>
828
+ # 0 3 B false
829
+ # 1 4 B (nil)
830
+ # 2 5 B true
831
+ # ---
832
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
833
+ # x y z
834
+ # <uint8> <string> <boolean>
835
+ # 0 6 C false
836
+ #
837
+ # @example Reject if Vector `:z` has any true.
838
+ # subframes.reject { |df| df[:z].any? }
839
+ #
840
+ # # =>
841
+ # #<RedAmber::SubFrames : 0x0000000000038d74>
842
+ # @baseframe=#<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
843
+ # 1 SubFrame: [1] in size.
844
+ # ---
845
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
846
+ # x y z
847
+ # <uint8> <string> <boolean>
848
+ # 0 6 C false
849
+ #
850
+ # @since 0.4.0
851
+ #
852
+ define_subframable_method :reject
853
+
854
+ # Returns a SubFrames containing truthy DataFrames returned by the block.
855
+ #
856
+ # With a block given, calls the block with successive DataFrames;
857
+ # returns a SubFrames of those DataFrames for
858
+ # which the block returns nil or false.
859
+ # @example Filter for size is larger than 1 and append number to column 'y'.
860
+ # subframes.filter_map do |df|
861
+ # if df.size > 1
862
+ # df.assign(:y) do
863
+ # y.merge(indices('1'), sep: '')
864
+ # end
865
+ # end
866
+ # end
867
+ #
868
+ # # =>
869
+ # #<RedAmber::SubFrames : 0x000000000001da88>
870
+ # @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000001da9c>
871
+ # 2 SubFrames: [2, 3] in sizes.
872
+ # ---
873
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000001dab0>
874
+ # x y z
875
+ # <uint8> <string> <boolean>
876
+ # 0 1 A1 false
877
+ # 1 2 A2 true
878
+ # ---
879
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000001dac4>
880
+ # x y z
881
+ # <uint8> <string> <boolean>
882
+ # 0 3 B1 false
883
+ # 1 4 B2 (nil)
884
+ # 2 5 B3 true
885
+ #
886
+ # @since 0.4.0
887
+ #
888
+ define_subframable_method :filter_map
889
+
890
+ # Number of subsets.
891
+ #
892
+ # @return [Integer]
893
+ # number of subsets in self.
894
+ # @since 0.4.0
895
+ #
896
+ def size
897
+ @size ||= @enum.size
898
+ end
899
+
900
+ # Size list of subsets.
901
+ #
902
+ # @return [Array<Integer>]
903
+ # sizes of sub DataFrames.
904
+ # @since 0.4.0
905
+ #
906
+ def sizes
907
+ @sizes ||= @enum.map(&:size)
908
+ end
909
+
910
+ # Indices at the top of each sub DataFrames.
911
+ #
912
+ # @return [Array<Integer>]
913
+ # indices of offset of each sub DataFrames.
914
+ # @example When `sizes` is [2, 3, 1].
915
+ # sf.offset_indices # => [0, 2, 5]
916
+ # @since 0.4.0
917
+ #
918
+ def offset_indices
919
+ sum = 0
920
+ sizes.map do |size|
921
+ sum += size
922
+ sum - size
923
+ end
924
+ end
925
+
926
+ # Test if subset is empty?.
927
+ #
928
+ # @return [true, false]
929
+ # true if self is an empty subset.
930
+ # @since 0.4.0
931
+ #
932
+ def empty?
933
+ size.zero?
934
+ end
935
+
936
+ # Test if self has only one subset and it is comprehensive.
937
+ #
938
+ # @return [true, false]
939
+ # true if only member of self is equal to universal DataFrame.
940
+ # @since 0.4.0
941
+ #
942
+ def universal?
943
+ size == 1 && @enum.first == baseframe
944
+ end
945
+
946
+ # Return string representation of self.
947
+ #
948
+ # @param limit [Integer]
949
+ # maximum number of DataFrames to show.
950
+ # @return [String]
951
+ # return string representation of each sub DataFrame.
952
+ # @example
953
+ # df
954
+ #
955
+ # # =>
956
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
957
+ # x y z
958
+ # <uint8> <string> <boolean>
959
+ # 0 1 A false
960
+ # 1 2 A true
961
+ # 2 3 B false
962
+ # 3 4 B (nil)
963
+ # 4 5 B true
964
+ # 5 6 C false
965
+ #
966
+ # puts SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
967
+ #
968
+ # # =>
969
+ # x y z
970
+ # <uint8> <string> <boolean>
971
+ # 0 1 A false
972
+ # 1 2 A true
973
+ # ---
974
+ # x y z
975
+ # <uint8> <string> <boolean>
976
+ # 0 3 B false
977
+ # 1 4 B (nil)
978
+ # 2 5 B true
979
+ # ---
980
+ # x y z
981
+ # <uint8> <string> <boolean>
982
+ # 0 6 C false
983
+ #
984
+ # @since 0.4.0
985
+ #
986
+ def to_s(limit: 16)
987
+ _to_s(limit: limit)
988
+ end
989
+
990
+ # Return summary information of self.
991
+ #
992
+ # @param limit [Integer]
993
+ # maximum number of DataFrames to show.
994
+ # @return [String]
995
+ # return class name, object id, universal DataFrame,
996
+ # size and subset sizes in a String.
997
+ # @example
998
+ # df
999
+ #
1000
+ # # =>
1001
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
1002
+ # x y z
1003
+ # <uint8> <string> <boolean>
1004
+ # 0 1 A false
1005
+ # 1 2 A true
1006
+ # 2 3 B false
1007
+ # 3 4 B (nil)
1008
+ # 4 5 B true
1009
+ # 5 6 C false
1010
+ #
1011
+ # SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
1012
+ #
1013
+ # # =>
1014
+ # #<RedAmber::SubFrames : 0x000000000000c1fc>
1015
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c170>
1016
+ # 3 SubFrames: [2, 3, 1] in sizes.
1017
+ # ---
1018
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
1019
+ # x y z
1020
+ # <uint8> <string> <boolean>
1021
+ # 0 1 A false
1022
+ # 1 2 A true
1023
+ # ---
1024
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a134>
1025
+ # x y z
1026
+ # <uint8> <string> <boolean>
1027
+ # 0 3 B false
1028
+ # 1 4 B (nil)
1029
+ # 2 5 B true
1030
+ # ---
1031
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
1032
+ # x y z
1033
+ # <uint8> <string> <boolean>
1034
+ # 0 6 C false
1035
+ #
1036
+ # @since 0.4.0
1037
+ #
1038
+ def inspect(limit: 16)
1039
+ sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ')
1040
+ "#<#{self.class} : #{format('0x%016x', object_id)}>\n" \
1041
+ "@baseframe=#<#{baseframe.shape_str(with_id: true)}>\n" \
1042
+ "#{size} SubFrame#{pl(size)}: " \
1043
+ "[#{sizes_truncated}] in size#{pl(size)}.\n" \
1044
+ "---\n#{_to_s(limit: limit, with_id: true)}"
1045
+ end
1046
+
1047
+ private
1048
+
1049
+ def frames
1050
+ @frames ||= @enum.to_a
1051
+ end
1052
+
1053
+ def _to_s(limit: 16, with_id: false)
1054
+ a = take(limit).map do |df|
1055
+ if with_id
1056
+ "#<#{df.shape_str(with_id: with_id)}>\n" \
1057
+ "#{df.to_s(head: 2, tail: 2)}"
1058
+ else
1059
+ df.to_s(head: 2, tail: 2)
1060
+ end
1061
+ end
1062
+ a << "+ #{size - limit} more DataFrame#{pl(size - limit)}.\n" if size > limit
1063
+ a.join("---\n")
1064
+ end
1065
+ end
1066
+ end