red_amber 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -17,26 +17,62 @@ module RedAmber
17
17
  using RefineArrowTable
18
18
  using RefineHash
19
19
 
20
- # Quicker DataFrame construction from a `Arrow::Table`.
21
- #
22
- # @param table [Arrow::Table] A table to have in the DataFrame.
23
- # @return [DataFrame] Initialized DataFrame.
24
- #
25
- # @note This method will allocate table directly and may be used in the method.
26
- # @note `table` must have unique keys.
27
- def self.create(table)
28
- instance = allocate
29
- instance.instance_variable_set(:@table, table)
30
- instance
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
31
36
  end
32
37
 
33
38
  # Creates a new DataFrame.
34
39
  #
40
+ # @overload initialize(hash)
41
+ # Initialize a DataFrame by a Hash.
42
+ #
43
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
44
+ # a Hash of `key` with array-like for column values.
45
+ # `key`s are Symbol or String.
46
+ # @example Initialize by a Hash
47
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
48
+ # DataFrame.new(hash)
49
+ # @example Initialize by a Hash like arguments.
50
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
51
+ # @example Initialize from #to_arrow_array responsibles.
52
+ # # #to_arrow_array responsible `array-like` is also available.
53
+ # require 'arrow-numo-narray'
54
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
55
+ #
35
56
  # @overload initialize(table)
36
- # Initialize DataFrame by an `Arrow::Table`
57
+ # Initialize a DataFrame by an `Arrow::Table`.
37
58
  #
38
59
  # @param table [Arrow::Table]
39
- # A table to have in the DataFrame.
60
+ # a table to have in the DataFrame.
61
+ # @example Initialize by a Table
62
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
63
+ # DataFrame.new(table)
64
+ #
65
+ # @overload initialize(schama, row_oriented_array)
66
+ # Initialize a DataFrame by schema and row_oriented_array.
67
+ #
68
+ # @param schema [Hash<key => type>]
69
+ # a schema of key and data type.
70
+ # @param row_oriented_array [Array]
71
+ # an Array of rows.
72
+ # @example Initialize by a schema and a row_oriented_array.
73
+ # schema = { x: :uint8, y: :string }
74
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
75
+ # DataFrame.new(schema, row_oriented_array)
40
76
  #
41
77
  # @overload initialize(arrowable)
42
78
  # Initialize DataFrame by a `#to_arrow` responsible object.
@@ -47,6 +83,11 @@ module RedAmber
47
83
  #
48
84
  # @note `RedAmber::DataFrame` itself is readable by this.
49
85
  # @note Hash is refined to respond to `#to_arrow` in this class.
86
+ # @example Initialize by Red Dataset object.
87
+ # require 'datasets-arrow'
88
+ # dataset = Datasets::Penguins.new
89
+ # penguins = DataFrame.new(dataset)
90
+ # @since 0.2.2
50
91
  #
51
92
  # @overload initialize(rover_like)
52
93
  # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
@@ -60,20 +101,18 @@ module RedAmber
60
101
  # @overload initialize()
61
102
  # Create empty DataFrame
62
103
  #
63
- # @example DataFrame.new
104
+ # @example
105
+ # DataFrame.new
64
106
  #
65
107
  # @overload initialize(empty)
66
108
  # Create empty DataFrame
67
109
  #
68
110
  # @param empty [nil, [], {}]
69
111
  #
70
- # @example DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
71
- #
72
- # @overload initialize(args)
73
- #
74
- # @param args [values]
75
- # Accepts any argments which is valid for `Arrow::Table.new(args)`. See
76
- # {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
112
+ # @example Return empty DataFrame.
113
+ # DataFrame.new([])
114
+ # DataFrame.new({})
115
+ # DataFrame.new(nil)
77
116
  #
78
117
  def initialize(*args)
79
118
  case args
@@ -109,15 +148,16 @@ module RedAmber
109
148
 
110
149
  # Returns the table having within.
111
150
  #
112
- # @return [Arrow::Table] The table within.
151
+ # @return [Arrow::Table]
152
+ # the table within.
113
153
  #
114
154
  attr_reader :table
115
-
116
155
  alias_method :to_arrow, :table
117
156
 
118
- # Returns the number of rows.
157
+ # Returns the number of records (rows).
119
158
  #
120
- # @return [Integer] Number of rows.
159
+ # @return [Integer]
160
+ # number of records (rows).
121
161
  #
122
162
  def size
123
163
  @table.n_rows
@@ -126,9 +166,10 @@ module RedAmber
126
166
  alias_method :n_obs, :size
127
167
  alias_method :n_rows, :size
128
168
 
129
- # Returns the number of columns.
169
+ # Returns the number of variables (columns).
130
170
  #
131
- # @return [Integer] Number of columns.
171
+ # @return [Integer]
172
+ # number of variables (columns).
132
173
  #
133
174
  def n_keys
134
175
  @table.n_columns
@@ -140,7 +181,7 @@ module RedAmber
140
181
  # Returns the numbers of rows and columns.
141
182
  #
142
183
  # @return [Array]
143
- # Number of rows and number of columns in an array.
184
+ # number of rows and number of columns in an array.
144
185
  # Same as [size, n_keys].
145
186
  #
146
187
  def shape
@@ -160,7 +201,7 @@ module RedAmber
160
201
  # Returns an Array of keys.
161
202
  #
162
203
  # @return [Array]
163
- # Keys in an Array.
204
+ # keys in an Array.
164
205
  #
165
206
  def keys
166
207
  @keys || @keys = init_instance_vars(:keys)
@@ -170,9 +211,10 @@ module RedAmber
170
211
 
171
212
  # Returns true if self has a specified key in the argument.
172
213
  #
173
- # @param key [Symbol, String] Key to test.
214
+ # @param key [Symbol, String]
215
+ # key to test.
174
216
  # @return [Boolean]
175
- # Returns true if self has key in Symbol.
217
+ # returns true if self has key in Symbol.
176
218
  #
177
219
  def key?(key)
178
220
  keys.include?(key.to_sym)
@@ -181,9 +223,10 @@ module RedAmber
181
223
 
182
224
  # Returns index of specified key in the Array keys.
183
225
  #
184
- # @param key [Symbol, String] key to know.
226
+ # @param key [Symbol, String]
227
+ # key to know.
185
228
  # @return [Integer]
186
- # Index of key in the Array keys.
229
+ # index of key in the Array keys.
187
230
  #
188
231
  def key_index(key)
189
232
  keys.find_index(key.to_sym)
@@ -194,7 +237,7 @@ module RedAmber
194
237
  # Returns abbreviated type names in an Array.
195
238
  #
196
239
  # @return [Array]
197
- # Abbreviated Red Arrow data type names.
240
+ # abbreviated Red Arrow data type names.
198
241
  #
199
242
  def types
200
243
  @types || @types = @table.columns.map do |column|
@@ -205,7 +248,7 @@ module RedAmber
205
248
  # Returns an Array of Classes of data type.
206
249
  #
207
250
  # @return [Array]
208
- # An Array of Red Arrow data type Classes.
251
+ # an Array of Red Arrow data type Classes.
209
252
  #
210
253
  def type_classes
211
254
  @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
@@ -214,34 +257,16 @@ module RedAmber
214
257
  # Returns Vectors in an Array.
215
258
  #
216
259
  # @return [Array]
217
- # An Array of `RedAmber::Vector`s.
260
+ # an Array of Vector.
218
261
  #
219
262
  def vectors
220
263
  @vectors || @vectors = init_instance_vars(:vectors)
221
264
  end
222
265
 
223
- # Returns row indices (start...(size+start)) in a Vector.
224
- #
225
- # @param start [Object]
226
- # Object which have `#succ` method.
227
- #
228
- # @return [Array]
229
- # A Vector of row indices.
230
- #
231
- # @example
232
- # (when self.size == 5)
233
- # - indices #=> Vector[0, 1, 2, 3, 4]
234
- # - indices(1) #=> Vector[1, 2, 3, 4, 5]
235
- # - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
236
- #
237
- def indices(start = 0)
238
- Vector.new((start..).take(size))
239
- end
240
- alias_method :indexes, :indices
241
-
242
266
  # Returns column-oriented data in a Hash.
243
267
  #
244
- # @return [Hash] A Hash of 'key => column_in_an_array'.
268
+ # @return [Hash]
269
+ # a Hash of 'key => column_in_an_array'.
245
270
  #
246
271
  def to_h
247
272
  variables.transform_values(&:to_a)
@@ -249,7 +274,8 @@ module RedAmber
249
274
 
250
275
  # Returns a row-oriented array without header.
251
276
  #
252
- # @return [Array] Row-oriented data without header.
277
+ # @return [Array]
278
+ # row-oriented data without header.
253
279
  #
254
280
  # @note If you need column-oriented array, use `.to_h.to_a`.
255
281
  #
@@ -260,7 +286,8 @@ module RedAmber
260
286
 
261
287
  # Returns column name and data type in a Hash.
262
288
  #
263
- # @return [Hash] Column name and data type.
289
+ # @return [Hash]
290
+ # column name and data type.
264
291
  #
265
292
  # @example
266
293
  # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
@@ -273,7 +300,7 @@ module RedAmber
273
300
  # Compare DataFrames.
274
301
  #
275
302
  # @return [true, false]
276
- # True if other is a DataFrame and table is same.
303
+ # true if other is a DataFrame and table is same.
277
304
  # Otherwise return false.
278
305
  #
279
306
  def ==(other)
@@ -282,7 +309,8 @@ module RedAmber
282
309
 
283
310
  # Check if it is a empty DataFrame.
284
311
  #
285
- # @return [true, false] True if it has no columns.
312
+ # @return [true, false
313
+ # ] true if it has no columns.
286
314
  #
287
315
  def empty?
288
316
  variables.empty?
@@ -293,14 +321,18 @@ module RedAmber
293
321
  # @overload each_row
294
322
  # Returns Enumerator when no block given.
295
323
  #
296
- # @return [Enumerator] Enumerator of each rows.
324
+ # @return [Enumerator]
325
+ # enumerator of each rows.
297
326
  #
298
327
  # @overload each_row(&block)
299
328
  # Yields with key and row pairs.
300
329
  #
301
- # @yield [key_row_pairs] Yields with key and row pairs.
302
- # @yieldparam [Hash] Key and row pairs.
303
- # @yieldreturn [Integer] Size of the DataFrame.
330
+ # @yieldparam key_row_pairs [Hash]
331
+ # key and row pairs.
332
+ # @yieldreturn [Integer]
333
+ # size of the DataFrame.
334
+ # @return [Integer]
335
+ # returns size.
304
336
  #
305
337
  def each_row
306
338
  return enum_for(:each_row) unless block_given?
@@ -316,25 +348,346 @@ module RedAmber
316
348
 
317
349
  # Returns self in a `Rover::DataFrame`.
318
350
  #
319
- # @return [Rover::DataFrame] A `Rover::DataFrame`.
351
+ # @return [Rover::DataFrame]
352
+ # a `Rover::DataFrame`.
320
353
  #
321
354
  def to_rover
322
355
  require 'rover'
323
356
  Rover::DataFrame.new(to_h)
324
357
  end
325
358
 
359
+ # Create a Group object. Or create a Group and summarize it.
360
+ #
361
+ # @overload group(*group_keys)
362
+ # Create a Group object.
363
+ #
364
+ # @param group_keys [Array<Symbol, String>]
365
+ # keys for grouping.
366
+ # @return [Group]
367
+ # Group object.
368
+ # @example Create a Group
369
+ # penguins.group(:species)
370
+ #
371
+ # # =>
372
+ # #<RedAmber::Group : 0x000000000000c3c8>
373
+ # species group_count
374
+ # <string> <uint8>
375
+ # 0 Adelie 152
376
+ # 1 Chinstrap 68
377
+ # 2 Gentoo 124
378
+ #
379
+ # @overload group(*group_keys)
380
+ # Create a Group and summarize it by aggregation functions from the block.
381
+ #
382
+ # @yieldparam group [Group]
383
+ # passes Group object.
384
+ # @yieldreturn [DataFrame, Array<DataFrame>]
385
+ # an aggregated DataFrame or an array of aggregated DataFrames.
386
+ # @return [DataFrame]
387
+ # summarized DataFrame.
388
+ # @example Create a group and summarize it.
389
+ # penguins.group(:species) { mean(:bill_length_mm) }
390
+ #
391
+ # # =>
392
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
393
+ # species mean(bill_length_mm)
394
+ # <string> <double>
395
+ # 0 Adelie 38.79
396
+ # 1 Chinstrap 48.83
397
+ # 2 Gentoo 47.5
398
+ #
326
399
  def group(*group_keys, &block)
327
400
  g = Group.new(self, group_keys)
328
401
  g = g.summarize(&block) if block
329
402
  g
330
403
  end
331
404
 
405
+ # Create SubFrames by value grouping.
406
+ #
407
+ # [Experimental feature] this method may be removed or be changed in the future.
408
+ # @param keys [Symbol, String, Array<Symbol, String>]
409
+ # grouping keys.
410
+ # @return [SubFrames]
411
+ # a created SubFrames grouped by column values on `keys`.
412
+ # @example
413
+ # df.sub_by_value(keys: :y)
414
+ #
415
+ # # =>
416
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
417
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
418
+ # 3 SubFrames: [2, 3, 1] in sizes.
419
+ # ---
420
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
421
+ # x y z
422
+ # <uint8> <string> <boolean>
423
+ # 0 1 A false
424
+ # 1 2 A true
425
+ # ---
426
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
427
+ # x y z
428
+ # <uint8> <string> <boolean>
429
+ # 0 3 B false
430
+ # 1 4 B (nil)
431
+ # 2 5 B true
432
+ # ---
433
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
434
+ # x y z
435
+ # <uint8> <string> <boolean>
436
+ # 0 6 C false
437
+ #
438
+ # @since 0.4.0
439
+ #
440
+ def sub_by_value(keys: nil)
441
+ SubFrames.new(self, group(keys).filters)
442
+ end
443
+ alias_method :subframes_by_value, :sub_by_value
444
+
445
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
446
+ #
447
+ # [Experimental feature] this method may be removed or be changed in the future.
448
+ # @param from [Integer]
449
+ # start position of window.
450
+ # @param size [Integer]
451
+ # window size.
452
+ # @param step [Integer]
453
+ # moving step of window.
454
+ # @return [SubFrames]
455
+ # a created SubFrames.
456
+ # @example
457
+ # df.sub_by_window(size: 4, step: 2)
458
+ #
459
+ # # =>
460
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
461
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
462
+ # 2 SubFrames: [4, 4] in sizes.
463
+ # ---
464
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
465
+ # x y z
466
+ # <uint8> <string> <boolean>
467
+ # 0 1 A false
468
+ # 1 2 A true
469
+ # 2 3 B false
470
+ # 3 4 B (nil)
471
+ # ---
472
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
473
+ # x y z
474
+ # <uint8> <string> <boolean>
475
+ # 0 3 B false
476
+ # 1 4 B (nil)
477
+ # 2 5 B true
478
+ # 3 6 C false
479
+ #
480
+ # @since 0.4.0
481
+ #
482
+ def sub_by_window(from: 0, size: nil, step: 1)
483
+ SubFrames.new(self) do
484
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
485
+ [*i...(i + size)]
486
+ end
487
+ end
488
+ end
489
+ alias_method :subframes_by_window, :sub_by_window
490
+
491
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
492
+ #
493
+ # This method will process the indices of self by enumerator.
494
+ # [Experimental feature] this method may be removed or be changed in the future.
495
+ # @param enumerator_method [Symbol]
496
+ # Enumerator name.
497
+ # @param args [<Object>]
498
+ # arguments for the enumerator method.
499
+ # @return [SubFrames]
500
+ # a created SubFrames.
501
+ # @example Create a SubFrames object sliced by 3 rows.
502
+ # df.sub_by_enum(:each_slice, 3)
503
+ #
504
+ # # =>
505
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
506
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
507
+ # 2 SubFrames: [3, 3] in sizes.
508
+ # ---
509
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
510
+ # x y z
511
+ # <uint8> <string> <boolean>
512
+ # 0 1 A false
513
+ # 1 2 A true
514
+ # 2 3 B false
515
+ # ---
516
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
517
+ # x y z
518
+ # <uint8> <string> <boolean>
519
+ # 0 4 B (nil)
520
+ # 1 5 B true
521
+ # 2 6 C false
522
+ #
523
+ # @example Create a SubFrames object for each consecutive 3 rows.
524
+ # df.sub_by_enum(:each_cons, 4)
525
+ #
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
529
+ # 3 SubFrames: [4, 4, 4] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # 2 3 B false
537
+ # 3 4 B (nil)
538
+ # ---
539
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
540
+ # x y z
541
+ # <uint8> <string> <boolean>
542
+ # 0 2 A true
543
+ # 1 3 B false
544
+ # 2 4 B (nil)
545
+ # 3 5 B true
546
+ # ---
547
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
548
+ # x y z
549
+ # <uint8> <string> <boolean>
550
+ # 0 3 B false
551
+ # 1 4 B (nil)
552
+ # 2 5 B true
553
+ # 3 6 C false
554
+ #
555
+ # @since 0.4.0
556
+ #
557
+ def sub_by_enum(enumerator_method, *args)
558
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
559
+ end
560
+ alias_method :subframes_by_enum, :sub_by_enum
561
+
562
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
563
+ #
564
+ # [Experimental feature] this method may be removed or be changed in the future.
565
+ # @param kernel [Array<true, false>, Vector]
566
+ # boolean array-like to pick records in the window.
567
+ # Kernel is a boolean Array and it behaves like a masked window.
568
+ # @param step [Integer]
569
+ # moving step of window.
570
+ # @return [SubFrames]
571
+ # a created SubFrames.
572
+ # @example
573
+ # kernel = [true, false, false, true]
574
+ # df.sub_by_kernel(kernel, step: 2)
575
+ #
576
+ # # =>
577
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
578
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
579
+ # 2 SubFrames: [2, 2] in sizes.
580
+ # ---
581
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
582
+ # x y z
583
+ # <uint8> <string> <boolean>
584
+ # 0 1 A false
585
+ # 1 4 B (nil)
586
+ # ---
587
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
588
+ # x y z
589
+ # <uint8> <string> <boolean>
590
+ # 0 3 B false
591
+ # 1 6 C false
592
+ #
593
+ # @since 0.4.0
594
+ #
595
+ def sub_by_kernel(kernel, step: 1)
596
+ limit_size = size - kernel.size
597
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
598
+ SubFrames.new(self) do
599
+ 0.step(by: step, to: limit_size).map do |i|
600
+ kernel_vector.shift(i)
601
+ end
602
+ end
603
+ end
604
+ alias_method :subframes_by_kernel, :sub_by_kernel
605
+
606
+ # Generic builder of sub-dataframes from self.
607
+ #
608
+ # [Experimental feature] this method may be removed or be changed in the future.
609
+ # @overload build_subframes(subset_specifier)
610
+ # Create a new SubFrames object.
611
+ #
612
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
613
+ # an Array of numeric indices or boolean filters
614
+ # to create subsets of DataFrame.
615
+ # @return [SubFrames]
616
+ # new SubFrames.
617
+ # @example
618
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
619
+ #
620
+ # # =>
621
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
622
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
623
+ # 2 SubFrames: [3, 3] in sizes.
624
+ # ---
625
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
626
+ # x y z
627
+ # <uint8> <string> <boolean>
628
+ # 0 1 A false
629
+ # 1 3 B false
630
+ # 2 5 B true
631
+ # ---
632
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
633
+ # x y z
634
+ # <uint8> <string> <boolean>
635
+ # 0 2 A true
636
+ # 1 4 B (nil)
637
+ # 2 6 C false
638
+ #
639
+ # @overload build_subframes
640
+ # Create a new SubFrames object by block.
641
+ #
642
+ # @yield [self]
643
+ # the block is called within the context of self.
644
+ # (Block is called by instance_eval(&block). )
645
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
646
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
647
+ # All array-likes are responsible to #numeric? or #boolean?.
648
+ # @example
649
+ # dataframe.build_subframes do
650
+ # even = indices.map(&:even?)
651
+ # [even, !even]
652
+ # end
653
+ #
654
+ # # =>
655
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
656
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
657
+ # 2 SubFrames: [3, 3] in sizes.
658
+ # ---
659
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
660
+ # x y z
661
+ # <uint8> <string> <boolean>
662
+ # 0 1 A false
663
+ # 1 3 B false
664
+ # 2 5 B true
665
+ # ---
666
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
667
+ # x y z
668
+ # <uint8> <string> <boolean>
669
+ # 0 2 A true
670
+ # 1 4 B (nil)
671
+ # 2 6 C false
672
+ #
673
+ # @since 0.4.0
674
+ #
675
+ def build_subframes(subset_specifier = nil, &block)
676
+ if block
677
+ SubFrames.new(self, instance_eval(&block))
678
+ else
679
+ SubFrames.new(self, subset_specifier)
680
+ end
681
+ end
682
+
683
+ # Catch variable (column) key as method name.
332
684
  def method_missing(name, *args, &block)
333
685
  return v(name) if args.empty? && key?(name)
334
686
 
335
687
  super
336
688
  end
337
689
 
690
+ # Catch variable (column) key as method name.
338
691
  def respond_to_missing?(name, include_private)
339
692
  return true if key?(name)
340
693
 
@@ -346,15 +699,16 @@ module RedAmber
346
699
  # initialize @variable, @keys, @vectors and return one of them
347
700
  def init_instance_vars(var)
348
701
  ary =
349
- @table.columns
350
- .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
351
- v = Vector.create(column.data)
352
- k = column.name.to_sym
353
- v.key = k
354
- variables[k] = v
355
- keys << k
356
- vectors << v
357
- end
702
+ @table
703
+ .columns
704
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
705
+ v = Vector.create(column.data)
706
+ k = column.name.to_sym
707
+ v.key = k
708
+ variables[k] = v
709
+ keys << k
710
+ vectors << v
711
+ end
358
712
 
359
713
  @variables, @keys, @vectors = ary
360
714
  ary[%i[variables keys vectors].index(var)]