red_amber 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -17,26 +17,62 @@ module RedAmber
17
17
  using RefineArrowTable
18
18
  using RefineHash
19
19
 
20
- # Quicker DataFrame construction from a `Arrow::Table`.
21
- #
22
- # @param table [Arrow::Table] A table to have in the DataFrame.
23
- # @return [DataFrame] Initialized DataFrame.
24
- #
25
- # @note This method will allocate table directly and may be used in the method.
26
- # @note `table` must have unique keys.
27
- def self.create(table)
28
- instance = allocate
29
- instance.instance_variable_set(:@table, table)
30
- instance
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
31
36
  end
32
37
 
33
38
  # Creates a new DataFrame.
34
39
  #
40
+ # @overload initialize(hash)
41
+ # Initialize a DataFrame by a Hash.
42
+ #
43
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
44
+ # a Hash of `key` with array-like for column values.
45
+ # `key`s are Symbol or String.
46
+ # @example Initialize by a Hash
47
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
48
+ # DataFrame.new(hash)
49
+ # @example Initialize by a Hash like arguments.
50
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
51
+ # @example Initialize from #to_arrow_array responsibles.
52
+ # # #to_arrow_array responsible `array-like` is also available.
53
+ # require 'arrow-numo-narray'
54
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
55
+ #
35
56
  # @overload initialize(table)
36
- # Initialize DataFrame by an `Arrow::Table`
57
+ # Initialize a DataFrame by an `Arrow::Table`.
37
58
  #
38
59
  # @param table [Arrow::Table]
39
- # A table to have in the DataFrame.
60
+ # a table to have in the DataFrame.
61
+ # @example Initialize by a Table
62
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
63
+ # DataFrame.new(table)
64
+ #
65
+ # @overload initialize(schama, row_oriented_array)
66
+ # Initialize a DataFrame by schema and row_oriented_array.
67
+ #
68
+ # @param schema [Hash<key => type>]
69
+ # a schema of key and data type.
70
+ # @param row_oriented_array [Array]
71
+ # an Array of rows.
72
+ # @example Initialize by a schema and a row_oriented_array.
73
+ # schema = { x: :uint8, y: :string }
74
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
75
+ # DataFrame.new(schema, row_oriented_array)
40
76
  #
41
77
  # @overload initialize(arrowable)
42
78
  # Initialize DataFrame by a `#to_arrow` responsible object.
@@ -47,6 +83,11 @@ module RedAmber
47
83
  #
48
84
  # @note `RedAmber::DataFrame` itself is readable by this.
49
85
  # @note Hash is refined to respond to `#to_arrow` in this class.
86
+ # @example Initialize by Red Dataset object.
87
+ # require 'datasets-arrow'
88
+ # dataset = Datasets::Penguins.new
89
+ # penguins = DataFrame.new(dataset)
90
+ # @since 0.2.2
50
91
  #
51
92
  # @overload initialize(rover_like)
52
93
  # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
@@ -60,20 +101,18 @@ module RedAmber
60
101
  # @overload initialize()
61
102
  # Create empty DataFrame
62
103
  #
63
- # @example DataFrame.new
104
+ # @example
105
+ # DataFrame.new
64
106
  #
65
107
  # @overload initialize(empty)
66
108
  # Create empty DataFrame
67
109
  #
68
110
  # @param empty [nil, [], {}]
69
111
  #
70
- # @example DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
71
- #
72
- # @overload initialize(args)
73
- #
74
- # @param args [values]
75
- # Accepts any argments which is valid for `Arrow::Table.new(args)`. See
76
- # {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
112
+ # @example Return empty DataFrame.
113
+ # DataFrame.new([])
114
+ # DataFrame.new({})
115
+ # DataFrame.new(nil)
77
116
  #
78
117
  def initialize(*args)
79
118
  case args
@@ -109,15 +148,16 @@ module RedAmber
109
148
 
110
149
  # Returns the table having within.
111
150
  #
112
- # @return [Arrow::Table] The table within.
151
+ # @return [Arrow::Table]
152
+ # the table within.
113
153
  #
114
154
  attr_reader :table
115
-
116
155
  alias_method :to_arrow, :table
117
156
 
118
- # Returns the number of rows.
157
+ # Returns the number of records (rows).
119
158
  #
120
- # @return [Integer] Number of rows.
159
+ # @return [Integer]
160
+ # number of records (rows).
121
161
  #
122
162
  def size
123
163
  @table.n_rows
@@ -126,9 +166,10 @@ module RedAmber
126
166
  alias_method :n_obs, :size
127
167
  alias_method :n_rows, :size
128
168
 
129
- # Returns the number of columns.
169
+ # Returns the number of variables (columns).
130
170
  #
131
- # @return [Integer] Number of columns.
171
+ # @return [Integer]
172
+ # number of variables (columns).
132
173
  #
133
174
  def n_keys
134
175
  @table.n_columns
@@ -140,7 +181,7 @@ module RedAmber
140
181
  # Returns the numbers of rows and columns.
141
182
  #
142
183
  # @return [Array]
143
- # Number of rows and number of columns in an array.
184
+ # number of rows and number of columns in an array.
144
185
  # Same as [size, n_keys].
145
186
  #
146
187
  def shape
@@ -160,7 +201,7 @@ module RedAmber
160
201
  # Returns an Array of keys.
161
202
  #
162
203
  # @return [Array]
163
- # Keys in an Array.
204
+ # keys in an Array.
164
205
  #
165
206
  def keys
166
207
  @keys || @keys = init_instance_vars(:keys)
@@ -170,9 +211,10 @@ module RedAmber
170
211
 
171
212
  # Returns true if self has a specified key in the argument.
172
213
  #
173
- # @param key [Symbol, String] Key to test.
214
+ # @param key [Symbol, String]
215
+ # key to test.
174
216
  # @return [Boolean]
175
- # Returns true if self has key in Symbol.
217
+ # returns true if self has key in Symbol.
176
218
  #
177
219
  def key?(key)
178
220
  keys.include?(key.to_sym)
@@ -181,9 +223,10 @@ module RedAmber
181
223
 
182
224
  # Returns index of specified key in the Array keys.
183
225
  #
184
- # @param key [Symbol, String] key to know.
226
+ # @param key [Symbol, String]
227
+ # key to know.
185
228
  # @return [Integer]
186
- # Index of key in the Array keys.
229
+ # index of key in the Array keys.
187
230
  #
188
231
  def key_index(key)
189
232
  keys.find_index(key.to_sym)
@@ -194,7 +237,7 @@ module RedAmber
194
237
  # Returns abbreviated type names in an Array.
195
238
  #
196
239
  # @return [Array]
197
- # Abbreviated Red Arrow data type names.
240
+ # abbreviated Red Arrow data type names.
198
241
  #
199
242
  def types
200
243
  @types || @types = @table.columns.map do |column|
@@ -205,7 +248,7 @@ module RedAmber
205
248
  # Returns an Array of Classes of data type.
206
249
  #
207
250
  # @return [Array]
208
- # An Array of Red Arrow data type Classes.
251
+ # an Array of Red Arrow data type Classes.
209
252
  #
210
253
  def type_classes
211
254
  @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
@@ -214,34 +257,16 @@ module RedAmber
214
257
  # Returns Vectors in an Array.
215
258
  #
216
259
  # @return [Array]
217
- # An Array of `RedAmber::Vector`s.
260
+ # an Array of Vector.
218
261
  #
219
262
  def vectors
220
263
  @vectors || @vectors = init_instance_vars(:vectors)
221
264
  end
222
265
 
223
- # Returns row indices (start...(size+start)) in a Vector.
224
- #
225
- # @param start [Object]
226
- # Object which have `#succ` method.
227
- #
228
- # @return [Array]
229
- # A Vector of row indices.
230
- #
231
- # @example
232
- # (when self.size == 5)
233
- # - indices #=> Vector[0, 1, 2, 3, 4]
234
- # - indices(1) #=> Vector[1, 2, 3, 4, 5]
235
- # - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
236
- #
237
- def indices(start = 0)
238
- Vector.new((start..).take(size))
239
- end
240
- alias_method :indexes, :indices
241
-
242
266
  # Returns column-oriented data in a Hash.
243
267
  #
244
- # @return [Hash] A Hash of 'key => column_in_an_array'.
268
+ # @return [Hash]
269
+ # a Hash of 'key => column_in_an_array'.
245
270
  #
246
271
  def to_h
247
272
  variables.transform_values(&:to_a)
@@ -249,7 +274,8 @@ module RedAmber
249
274
 
250
275
  # Returns a row-oriented array without header.
251
276
  #
252
- # @return [Array] Row-oriented data without header.
277
+ # @return [Array]
278
+ # row-oriented data without header.
253
279
  #
254
280
  # @note If you need column-oriented array, use `.to_h.to_a`.
255
281
  #
@@ -260,7 +286,8 @@ module RedAmber
260
286
 
261
287
  # Returns column name and data type in a Hash.
262
288
  #
263
- # @return [Hash] Column name and data type.
289
+ # @return [Hash]
290
+ # column name and data type.
264
291
  #
265
292
  # @example
266
293
  # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
@@ -273,7 +300,7 @@ module RedAmber
273
300
  # Compare DataFrames.
274
301
  #
275
302
  # @return [true, false]
276
- # True if other is a DataFrame and table is same.
303
+ # true if other is a DataFrame and table is same.
277
304
  # Otherwise return false.
278
305
  #
279
306
  def ==(other)
@@ -282,7 +309,8 @@ module RedAmber
282
309
 
283
310
  # Check if it is a empty DataFrame.
284
311
  #
285
- # @return [true, false] True if it has no columns.
312
+ # @return [true, false
313
+ # ] true if it has no columns.
286
314
  #
287
315
  def empty?
288
316
  variables.empty?
@@ -293,14 +321,18 @@ module RedAmber
293
321
  # @overload each_row
294
322
  # Returns Enumerator when no block given.
295
323
  #
296
- # @return [Enumerator] Enumerator of each rows.
324
+ # @return [Enumerator]
325
+ # enumerator of each rows.
297
326
  #
298
327
  # @overload each_row(&block)
299
328
  # Yields with key and row pairs.
300
329
  #
301
- # @yield [key_row_pairs] Yields with key and row pairs.
302
- # @yieldparam [Hash] Key and row pairs.
303
- # @yieldreturn [Integer] Size of the DataFrame.
330
+ # @yieldparam key_row_pairs [Hash]
331
+ # key and row pairs.
332
+ # @yieldreturn [Integer]
333
+ # size of the DataFrame.
334
+ # @return [Integer]
335
+ # returns size.
304
336
  #
305
337
  def each_row
306
338
  return enum_for(:each_row) unless block_given?
@@ -316,25 +348,346 @@ module RedAmber
316
348
 
317
349
  # Returns self in a `Rover::DataFrame`.
318
350
  #
319
- # @return [Rover::DataFrame] A `Rover::DataFrame`.
351
+ # @return [Rover::DataFrame]
352
+ # a `Rover::DataFrame`.
320
353
  #
321
354
  def to_rover
322
355
  require 'rover'
323
356
  Rover::DataFrame.new(to_h)
324
357
  end
325
358
 
359
+ # Create a Group object. Or create a Group and summarize it.
360
+ #
361
+ # @overload group(*group_keys)
362
+ # Create a Group object.
363
+ #
364
+ # @param group_keys [Array<Symbol, String>]
365
+ # keys for grouping.
366
+ # @return [Group]
367
+ # Group object.
368
+ # @example Create a Group
369
+ # penguins.group(:species)
370
+ #
371
+ # # =>
372
+ # #<RedAmber::Group : 0x000000000000c3c8>
373
+ # species group_count
374
+ # <string> <uint8>
375
+ # 0 Adelie 152
376
+ # 1 Chinstrap 68
377
+ # 2 Gentoo 124
378
+ #
379
+ # @overload group(*group_keys)
380
+ # Create a Group and summarize it by aggregation functions from the block.
381
+ #
382
+ # @yieldparam group [Group]
383
+ # passes Group object.
384
+ # @yieldreturn [DataFrame, Array<DataFrame>]
385
+ # an aggregated DataFrame or an array of aggregated DataFrames.
386
+ # @return [DataFrame]
387
+ # summarized DataFrame.
388
+ # @example Create a group and summarize it.
389
+ # penguins.group(:species) { mean(:bill_length_mm) }
390
+ #
391
+ # # =>
392
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
393
+ # species mean(bill_length_mm)
394
+ # <string> <double>
395
+ # 0 Adelie 38.79
396
+ # 1 Chinstrap 48.83
397
+ # 2 Gentoo 47.5
398
+ #
326
399
  def group(*group_keys, &block)
327
400
  g = Group.new(self, group_keys)
328
401
  g = g.summarize(&block) if block
329
402
  g
330
403
  end
331
404
 
405
+ # Create SubFrames by value grouping.
406
+ #
407
+ # [Experimental feature] this method may be removed or be changed in the future.
408
+ # @param keys [Symbol, String, Array<Symbol, String>]
409
+ # grouping keys.
410
+ # @return [SubFrames]
411
+ # a created SubFrames grouped by column values on `keys`.
412
+ # @example
413
+ # df.sub_by_value(keys: :y)
414
+ #
415
+ # # =>
416
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
417
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
418
+ # 3 SubFrames: [2, 3, 1] in sizes.
419
+ # ---
420
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
421
+ # x y z
422
+ # <uint8> <string> <boolean>
423
+ # 0 1 A false
424
+ # 1 2 A true
425
+ # ---
426
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
427
+ # x y z
428
+ # <uint8> <string> <boolean>
429
+ # 0 3 B false
430
+ # 1 4 B (nil)
431
+ # 2 5 B true
432
+ # ---
433
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
434
+ # x y z
435
+ # <uint8> <string> <boolean>
436
+ # 0 6 C false
437
+ #
438
+ # @since 0.4.0
439
+ #
440
+ def sub_by_value(keys: nil)
441
+ SubFrames.new(self, group(keys).filters)
442
+ end
443
+ alias_method :subframes_by_value, :sub_by_value
444
+
445
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
446
+ #
447
+ # [Experimental feature] this method may be removed or be changed in the future.
448
+ # @param from [Integer]
449
+ # start position of window.
450
+ # @param size [Integer]
451
+ # window size.
452
+ # @param step [Integer]
453
+ # moving step of window.
454
+ # @return [SubFrames]
455
+ # a created SubFrames.
456
+ # @example
457
+ # df.sub_by_window(size: 4, step: 2)
458
+ #
459
+ # # =>
460
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
461
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
462
+ # 2 SubFrames: [4, 4] in sizes.
463
+ # ---
464
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
465
+ # x y z
466
+ # <uint8> <string> <boolean>
467
+ # 0 1 A false
468
+ # 1 2 A true
469
+ # 2 3 B false
470
+ # 3 4 B (nil)
471
+ # ---
472
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
473
+ # x y z
474
+ # <uint8> <string> <boolean>
475
+ # 0 3 B false
476
+ # 1 4 B (nil)
477
+ # 2 5 B true
478
+ # 3 6 C false
479
+ #
480
+ # @since 0.4.0
481
+ #
482
+ def sub_by_window(from: 0, size: nil, step: 1)
483
+ SubFrames.new(self) do
484
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
485
+ [*i...(i + size)]
486
+ end
487
+ end
488
+ end
489
+ alias_method :subframes_by_window, :sub_by_window
490
+
491
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
492
+ #
493
+ # This method will process the indices of self by enumerator.
494
+ # [Experimental feature] this method may be removed or be changed in the future.
495
+ # @param enumerator_method [Symbol]
496
+ # Enumerator name.
497
+ # @param args [<Object>]
498
+ # arguments for the enumerator method.
499
+ # @return [SubFrames]
500
+ # a created SubFrames.
501
+ # @example Create a SubFrames object sliced by 3 rows.
502
+ # df.sub_by_enum(:each_slice, 3)
503
+ #
504
+ # # =>
505
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
506
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
507
+ # 2 SubFrames: [3, 3] in sizes.
508
+ # ---
509
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
510
+ # x y z
511
+ # <uint8> <string> <boolean>
512
+ # 0 1 A false
513
+ # 1 2 A true
514
+ # 2 3 B false
515
+ # ---
516
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
517
+ # x y z
518
+ # <uint8> <string> <boolean>
519
+ # 0 4 B (nil)
520
+ # 1 5 B true
521
+ # 2 6 C false
522
+ #
523
+ # @example Create a SubFrames object for each consecutive 3 rows.
524
+ # df.sub_by_enum(:each_cons, 4)
525
+ #
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
529
+ # 3 SubFrames: [4, 4, 4] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # 2 3 B false
537
+ # 3 4 B (nil)
538
+ # ---
539
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
540
+ # x y z
541
+ # <uint8> <string> <boolean>
542
+ # 0 2 A true
543
+ # 1 3 B false
544
+ # 2 4 B (nil)
545
+ # 3 5 B true
546
+ # ---
547
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
548
+ # x y z
549
+ # <uint8> <string> <boolean>
550
+ # 0 3 B false
551
+ # 1 4 B (nil)
552
+ # 2 5 B true
553
+ # 3 6 C false
554
+ #
555
+ # @since 0.4.0
556
+ #
557
+ def sub_by_enum(enumerator_method, *args)
558
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
559
+ end
560
+ alias_method :subframes_by_enum, :sub_by_enum
561
+
562
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
563
+ #
564
+ # [Experimental feature] this method may be removed or be changed in the future.
565
+ # @param kernel [Array<true, false>, Vector]
566
+ # boolean array-like to pick records in the window.
567
+ # Kernel is a boolean Array and it behaves like a masked window.
568
+ # @param step [Integer]
569
+ # moving step of window.
570
+ # @return [SubFrames]
571
+ # a created SubFrames.
572
+ # @example
573
+ # kernel = [true, false, false, true]
574
+ # df.sub_by_kernel(kernel, step: 2)
575
+ #
576
+ # # =>
577
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
578
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
579
+ # 2 SubFrames: [2, 2] in sizes.
580
+ # ---
581
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
582
+ # x y z
583
+ # <uint8> <string> <boolean>
584
+ # 0 1 A false
585
+ # 1 4 B (nil)
586
+ # ---
587
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
588
+ # x y z
589
+ # <uint8> <string> <boolean>
590
+ # 0 3 B false
591
+ # 1 6 C false
592
+ #
593
+ # @since 0.4.0
594
+ #
595
+ def sub_by_kernel(kernel, step: 1)
596
+ limit_size = size - kernel.size
597
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
598
+ SubFrames.new(self) do
599
+ 0.step(by: step, to: limit_size).map do |i|
600
+ kernel_vector.shift(i)
601
+ end
602
+ end
603
+ end
604
+ alias_method :subframes_by_kernel, :sub_by_kernel
605
+
606
+ # Generic builder of sub-dataframes from self.
607
+ #
608
+ # [Experimental feature] this method may be removed or be changed in the future.
609
+ # @overload build_subframes(subset_specifier)
610
+ # Create a new SubFrames object.
611
+ #
612
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
613
+ # an Array of numeric indices or boolean filters
614
+ # to create subsets of DataFrame.
615
+ # @return [SubFrames]
616
+ # new SubFrames.
617
+ # @example
618
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
619
+ #
620
+ # # =>
621
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
622
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
623
+ # 2 SubFrames: [3, 3] in sizes.
624
+ # ---
625
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
626
+ # x y z
627
+ # <uint8> <string> <boolean>
628
+ # 0 1 A false
629
+ # 1 3 B false
630
+ # 2 5 B true
631
+ # ---
632
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
633
+ # x y z
634
+ # <uint8> <string> <boolean>
635
+ # 0 2 A true
636
+ # 1 4 B (nil)
637
+ # 2 6 C false
638
+ #
639
+ # @overload build_subframes
640
+ # Create a new SubFrames object by block.
641
+ #
642
+ # @yield [self]
643
+ # the block is called within the context of self.
644
+ # (Block is called by instance_eval(&block). )
645
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
646
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
647
+ # All array-likes are responsible to #numeric? or #boolean?.
648
+ # @example
649
+ # dataframe.build_subframes do
650
+ # even = indices.map(&:even?)
651
+ # [even, !even]
652
+ # end
653
+ #
654
+ # # =>
655
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
656
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
657
+ # 2 SubFrames: [3, 3] in sizes.
658
+ # ---
659
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
660
+ # x y z
661
+ # <uint8> <string> <boolean>
662
+ # 0 1 A false
663
+ # 1 3 B false
664
+ # 2 5 B true
665
+ # ---
666
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
667
+ # x y z
668
+ # <uint8> <string> <boolean>
669
+ # 0 2 A true
670
+ # 1 4 B (nil)
671
+ # 2 6 C false
672
+ #
673
+ # @since 0.4.0
674
+ #
675
+ def build_subframes(subset_specifier = nil, &block)
676
+ if block
677
+ SubFrames.new(self, instance_eval(&block))
678
+ else
679
+ SubFrames.new(self, subset_specifier)
680
+ end
681
+ end
682
+
683
+ # Catch variable (column) key as method name.
332
684
  def method_missing(name, *args, &block)
333
685
  return v(name) if args.empty? && key?(name)
334
686
 
335
687
  super
336
688
  end
337
689
 
690
+ # Catch variable (column) key as method name.
338
691
  def respond_to_missing?(name, include_private)
339
692
  return true if key?(name)
340
693
 
@@ -346,15 +699,16 @@ module RedAmber
346
699
  # initialize @variable, @keys, @vectors and return one of them
347
700
  def init_instance_vars(var)
348
701
  ary =
349
- @table.columns
350
- .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
351
- v = Vector.create(column.data)
352
- k = column.name.to_sym
353
- v.key = k
354
- variables[k] = v
355
- keys << k
356
- vectors << v
357
- end
702
+ @table
703
+ .columns
704
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
705
+ v = Vector.create(column.data)
706
+ k = column.name.to_sym
707
+ v.key = k
708
+ variables[k] = v
709
+ keys << k
710
+ vectors << v
711
+ end
358
712
 
359
713
  @variables, @keys, @vectors = ary
360
714
  ary[%i[variables keys vectors].index(var)]