red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -17,26 +17,79 @@ module RedAmber
17
17
  using RefineArrowTable
18
18
  using RefineHash
19
19
 
20
- # Quicker DataFrame construction from a `Arrow::Table`.
21
- #
22
- # @param table [Arrow::Table] A table to have in the DataFrame.
23
- # @return [DataFrame] Initialized DataFrame.
24
- #
25
- # @note This method will allocate table directly and may be used in the method.
26
- # @note `table` must have unique keys.
27
- def self.create(table)
28
- instance = allocate
29
- instance.instance_variable_set(:@table, table)
30
- instance
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
36
+
37
+ # Return new DataFrame for specified schema and value.
38
+ #
39
+ # @param dataframe_for_schema [Dataframe]
40
+ # schema of this dataframe will be used.
41
+ # @param dataframe_for_value [DataFrame]
42
+ # column values of thes dataframe will be used.
43
+ # @return [DataFrame]
44
+ # created DataFrame.
45
+ # @since 0.4.1
46
+ #
47
+ def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
48
+ DataFrame.create(
49
+ Arrow::Table.new(dataframe_for_schema.table.schema,
50
+ dataframe_for_value.table.columns)
51
+ )
52
+ end
31
53
  end
32
54
 
33
55
  # Creates a new DataFrame.
34
56
  #
57
+ # @overload initialize(hash)
58
+ # Initialize a DataFrame by a Hash.
59
+ #
60
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
61
+ # a Hash of `key` with array-like for column values.
62
+ # `key`s are Symbol or String.
63
+ # @example Initialize by a Hash
64
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
65
+ # DataFrame.new(hash)
66
+ # @example Initialize by a Hash like arguments.
67
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
68
+ # @example Initialize from #to_arrow_array responsibles.
69
+ # # #to_arrow_array responsible `array-like` is also available.
70
+ # require 'arrow-numo-narray'
71
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
72
+ #
35
73
  # @overload initialize(table)
36
- # Initialize DataFrame by an `Arrow::Table`
74
+ # Initialize a DataFrame by an `Arrow::Table`.
37
75
  #
38
76
  # @param table [Arrow::Table]
39
- # A table to have in the DataFrame.
77
+ # a table to have in the DataFrame.
78
+ # @example Initialize by a Table
79
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
80
+ # DataFrame.new(table)
81
+ #
82
+ # @overload initialize(schama, row_oriented_array)
83
+ # Initialize a DataFrame by schema and row_oriented_array.
84
+ #
85
+ # @param schema [Hash<key => type>]
86
+ # a schema of key and data type.
87
+ # @param row_oriented_array [Array]
88
+ # an Array of rows.
89
+ # @example Initialize by a schema and a row_oriented_array.
90
+ # schema = { x: :uint8, y: :string }
91
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
92
+ # DataFrame.new(schema, row_oriented_array)
40
93
  #
41
94
  # @overload initialize(arrowable)
42
95
  # Initialize DataFrame by a `#to_arrow` responsible object.
@@ -47,6 +100,11 @@ module RedAmber
47
100
  #
48
101
  # @note `RedAmber::DataFrame` itself is readable by this.
49
102
  # @note Hash is refined to respond to `#to_arrow` in this class.
103
+ # @example Initialize by Red Dataset object.
104
+ # require 'datasets-arrow'
105
+ # dataset = Datasets::Penguins.new
106
+ # penguins = DataFrame.new(dataset)
107
+ # @since 0.2.2
50
108
  #
51
109
  # @overload initialize(rover_like)
52
110
  # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
@@ -60,20 +118,18 @@ module RedAmber
60
118
  # @overload initialize()
61
119
  # Create empty DataFrame
62
120
  #
63
- # @example DataFrame.new
121
+ # @example
122
+ # DataFrame.new
64
123
  #
65
124
  # @overload initialize(empty)
66
125
  # Create empty DataFrame
67
126
  #
68
127
  # @param empty [nil, [], {}]
69
128
  #
70
- # @example DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
71
- #
72
- # @overload initialize(args)
73
- #
74
- # @param args [values]
75
- # Accepts any argments which is valid for `Arrow::Table.new(args)`. See
76
- # {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
129
+ # @example Return empty DataFrame.
130
+ # DataFrame.new([])
131
+ # DataFrame.new({})
132
+ # DataFrame.new(nil)
77
133
  #
78
134
  def initialize(*args)
79
135
  case args
@@ -109,15 +165,16 @@ module RedAmber
109
165
 
110
166
  # Returns the table having within.
111
167
  #
112
- # @return [Arrow::Table] The table within.
168
+ # @return [Arrow::Table]
169
+ # the table within.
113
170
  #
114
171
  attr_reader :table
115
-
116
172
  alias_method :to_arrow, :table
117
173
 
118
- # Returns the number of rows.
174
+ # Returns the number of records (rows).
119
175
  #
120
- # @return [Integer] Number of rows.
176
+ # @return [Integer]
177
+ # number of records (rows).
121
178
  #
122
179
  def size
123
180
  @table.n_rows
@@ -126,9 +183,10 @@ module RedAmber
126
183
  alias_method :n_obs, :size
127
184
  alias_method :n_rows, :size
128
185
 
129
- # Returns the number of columns.
186
+ # Returns the number of variables (columns).
130
187
  #
131
- # @return [Integer] Number of columns.
188
+ # @return [Integer]
189
+ # number of variables (columns).
132
190
  #
133
191
  def n_keys
134
192
  @table.n_columns
@@ -140,7 +198,7 @@ module RedAmber
140
198
  # Returns the numbers of rows and columns.
141
199
  #
142
200
  # @return [Array]
143
- # Number of rows and number of columns in an array.
201
+ # number of rows and number of columns in an array.
144
202
  # Same as [size, n_keys].
145
203
  #
146
204
  def shape
@@ -153,26 +211,27 @@ module RedAmber
153
211
  # `key => Vector` pairs for each columns.
154
212
  #
155
213
  def variables
156
- @variables || @variables = init_instance_vars(:variables)
214
+ @variables ||= init_instance_vars(:variables)
157
215
  end
158
216
  alias_method :vars, :variables
159
217
 
160
218
  # Returns an Array of keys.
161
219
  #
162
220
  # @return [Array]
163
- # Keys in an Array.
221
+ # keys in an Array.
164
222
  #
165
223
  def keys
166
- @keys || @keys = init_instance_vars(:keys)
224
+ @keys ||= init_instance_vars(:keys)
167
225
  end
168
226
  alias_method :column_names, :keys
169
227
  alias_method :var_names, :keys
170
228
 
171
229
  # Returns true if self has a specified key in the argument.
172
230
  #
173
- # @param key [Symbol, String] Key to test.
231
+ # @param key [Symbol, String]
232
+ # key to test.
174
233
  # @return [Boolean]
175
- # Returns true if self has key in Symbol.
234
+ # returns true if self has key in Symbol.
176
235
  #
177
236
  def key?(key)
178
237
  keys.include?(key.to_sym)
@@ -181,9 +240,10 @@ module RedAmber
181
240
 
182
241
  # Returns index of specified key in the Array keys.
183
242
  #
184
- # @param key [Symbol, String] key to know.
243
+ # @param key [Symbol, String]
244
+ # key to know.
185
245
  # @return [Integer]
186
- # Index of key in the Array keys.
246
+ # index of key in the Array keys.
187
247
  #
188
248
  def key_index(key)
189
249
  keys.find_index(key.to_sym)
@@ -194,10 +254,10 @@ module RedAmber
194
254
  # Returns abbreviated type names in an Array.
195
255
  #
196
256
  # @return [Array]
197
- # Abbreviated Red Arrow data type names.
257
+ # abbreviated Red Arrow data type names.
198
258
  #
199
259
  def types
200
- @types || @types = @table.columns.map do |column|
260
+ @types ||= @table.columns.map do |column|
201
261
  column.data.value_type.nick.to_sym
202
262
  end
203
263
  end
@@ -205,43 +265,25 @@ module RedAmber
205
265
  # Returns an Array of Classes of data type.
206
266
  #
207
267
  # @return [Array]
208
- # An Array of Red Arrow data type Classes.
268
+ # an Array of Red Arrow data type Classes.
209
269
  #
210
270
  def type_classes
211
- @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
271
+ @type_classes ||= @table.columns.map { |column| column.data_type.class }
212
272
  end
213
273
 
214
274
  # Returns Vectors in an Array.
215
275
  #
216
276
  # @return [Array]
217
- # An Array of `RedAmber::Vector`s.
277
+ # an Array of Vector.
218
278
  #
219
279
  def vectors
220
- @vectors || @vectors = init_instance_vars(:vectors)
280
+ @vectors ||= init_instance_vars(:vectors)
221
281
  end
222
282
 
223
- # Returns row indices (start...(size+start)) in a Vector.
224
- #
225
- # @param start [Object]
226
- # Object which have `#succ` method.
227
- #
228
- # @return [Array]
229
- # A Vector of row indices.
230
- #
231
- # @example
232
- # (when self.size == 5)
233
- # - indices #=> Vector[0, 1, 2, 3, 4]
234
- # - indices(1) #=> Vector[1, 2, 3, 4, 5]
235
- # - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
236
- #
237
- def indices(start = 0)
238
- Vector.new((start..).take(size))
239
- end
240
- alias_method :indexes, :indices
241
-
242
283
  # Returns column-oriented data in a Hash.
243
284
  #
244
- # @return [Hash] A Hash of 'key => column_in_an_array'.
285
+ # @return [Hash]
286
+ # a Hash of 'key => column_in_an_array'.
245
287
  #
246
288
  def to_h
247
289
  variables.transform_values(&:to_a)
@@ -249,7 +291,8 @@ module RedAmber
249
291
 
250
292
  # Returns a row-oriented array without header.
251
293
  #
252
- # @return [Array] Row-oriented data without header.
294
+ # @return [Array]
295
+ # row-oriented data without header.
253
296
  #
254
297
  # @note If you need column-oriented array, use `.to_h.to_a`.
255
298
  #
@@ -260,7 +303,8 @@ module RedAmber
260
303
 
261
304
  # Returns column name and data type in a Hash.
262
305
  #
263
- # @return [Hash] Column name and data type.
306
+ # @return [Hash]
307
+ # column name and data type.
264
308
  #
265
309
  # @example
266
310
  # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
@@ -273,7 +317,7 @@ module RedAmber
273
317
  # Compare DataFrames.
274
318
  #
275
319
  # @return [true, false]
276
- # True if other is a DataFrame and table is same.
320
+ # true if other is a DataFrame and table is same.
277
321
  # Otherwise return false.
278
322
  #
279
323
  def ==(other)
@@ -282,7 +326,8 @@ module RedAmber
282
326
 
283
327
  # Check if it is a empty DataFrame.
284
328
  #
285
- # @return [true, false] True if it has no columns.
329
+ # @return [true, false
330
+ # ] true if it has no columns.
286
331
  #
287
332
  def empty?
288
333
  variables.empty?
@@ -293,14 +338,18 @@ module RedAmber
293
338
  # @overload each_row
294
339
  # Returns Enumerator when no block given.
295
340
  #
296
- # @return [Enumerator] Enumerator of each rows.
341
+ # @return [Enumerator]
342
+ # enumerator of each rows.
297
343
  #
298
344
  # @overload each_row(&block)
299
345
  # Yields with key and row pairs.
300
346
  #
301
- # @yield [key_row_pairs] Yields with key and row pairs.
302
- # @yieldparam [Hash] Key and row pairs.
303
- # @yieldreturn [Integer] Size of the DataFrame.
347
+ # @yieldparam key_row_pairs [Hash]
348
+ # key and row pairs.
349
+ # @yieldreturn [Integer]
350
+ # size of the DataFrame.
351
+ # @return [Integer]
352
+ # returns size.
304
353
  #
305
354
  def each_row
306
355
  return enum_for(:each_row) unless block_given?
@@ -316,25 +365,346 @@ module RedAmber
316
365
 
317
366
  # Returns self in a `Rover::DataFrame`.
318
367
  #
319
- # @return [Rover::DataFrame] A `Rover::DataFrame`.
368
+ # @return [Rover::DataFrame]
369
+ # a `Rover::DataFrame`.
320
370
  #
321
371
  def to_rover
322
372
  require 'rover'
323
373
  Rover::DataFrame.new(to_h)
324
374
  end
325
375
 
376
+ # Create a Group object. Or create a Group and summarize it.
377
+ #
378
+ # @overload group(*group_keys)
379
+ # Create a Group object.
380
+ #
381
+ # @param group_keys [Array<Symbol, String>]
382
+ # keys for grouping.
383
+ # @return [Group]
384
+ # Group object.
385
+ # @example Create a Group
386
+ # penguins.group(:species)
387
+ #
388
+ # # =>
389
+ # #<RedAmber::Group : 0x000000000000c3c8>
390
+ # species group_count
391
+ # <string> <uint8>
392
+ # 0 Adelie 152
393
+ # 1 Chinstrap 68
394
+ # 2 Gentoo 124
395
+ #
396
+ # @overload group(*group_keys)
397
+ # Create a Group and summarize it by aggregation functions from the block.
398
+ #
399
+ # @yieldparam group [Group]
400
+ # passes Group object.
401
+ # @yieldreturn [DataFrame, Array<DataFrame>]
402
+ # an aggregated DataFrame or an array of aggregated DataFrames.
403
+ # @return [DataFrame]
404
+ # summarized DataFrame.
405
+ # @example Create a group and summarize it.
406
+ # penguins.group(:species) { mean(:bill_length_mm) }
407
+ #
408
+ # # =>
409
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
410
+ # species mean(bill_length_mm)
411
+ # <string> <double>
412
+ # 0 Adelie 38.79
413
+ # 1 Chinstrap 48.83
414
+ # 2 Gentoo 47.5
415
+ #
326
416
  def group(*group_keys, &block)
327
417
  g = Group.new(self, group_keys)
328
418
  g = g.summarize(&block) if block
329
419
  g
330
420
  end
331
421
 
422
+ # Create SubFrames by value grouping.
423
+ #
424
+ # [Experimental feature] this method may be removed or be changed in the future.
425
+ # @param keys [Symbol, String, Array<Symbol, String>]
426
+ # grouping keys.
427
+ # @return [SubFrames]
428
+ # a created SubFrames grouped by column values on `keys`.
429
+ # @example
430
+ # df.sub_by_value(keys: :y)
431
+ #
432
+ # # =>
433
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
434
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
435
+ # 3 SubFrames: [2, 3, 1] in sizes.
436
+ # ---
437
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
438
+ # x y z
439
+ # <uint8> <string> <boolean>
440
+ # 0 1 A false
441
+ # 1 2 A true
442
+ # ---
443
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
444
+ # x y z
445
+ # <uint8> <string> <boolean>
446
+ # 0 3 B false
447
+ # 1 4 B (nil)
448
+ # 2 5 B true
449
+ # ---
450
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
451
+ # x y z
452
+ # <uint8> <string> <boolean>
453
+ # 0 6 C false
454
+ #
455
+ # @since 0.4.0
456
+ #
457
+ def sub_by_value(keys: nil)
458
+ SubFrames.new(self, group(keys).filters)
459
+ end
460
+ alias_method :subframes_by_value, :sub_by_value
461
+
462
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
463
+ #
464
+ # [Experimental feature] this method may be removed or be changed in the future.
465
+ # @param from [Integer]
466
+ # start position of window.
467
+ # @param size [Integer]
468
+ # window size.
469
+ # @param step [Integer]
470
+ # moving step of window.
471
+ # @return [SubFrames]
472
+ # a created SubFrames.
473
+ # @example
474
+ # df.sub_by_window(size: 4, step: 2)
475
+ #
476
+ # # =>
477
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
478
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
479
+ # 2 SubFrames: [4, 4] in sizes.
480
+ # ---
481
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
482
+ # x y z
483
+ # <uint8> <string> <boolean>
484
+ # 0 1 A false
485
+ # 1 2 A true
486
+ # 2 3 B false
487
+ # 3 4 B (nil)
488
+ # ---
489
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
490
+ # x y z
491
+ # <uint8> <string> <boolean>
492
+ # 0 3 B false
493
+ # 1 4 B (nil)
494
+ # 2 5 B true
495
+ # 3 6 C false
496
+ #
497
+ # @since 0.4.0
498
+ #
499
+ def sub_by_window(from: 0, size: nil, step: 1)
500
+ SubFrames.new(self) do
501
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
502
+ [*i...(i + size)]
503
+ end
504
+ end
505
+ end
506
+ alias_method :subframes_by_window, :sub_by_window
507
+
508
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
509
+ #
510
+ # This method will process the indices of self by enumerator.
511
+ # [Experimental feature] this method may be removed or be changed in the future.
512
+ # @param enumerator_method [Symbol]
513
+ # Enumerator name.
514
+ # @param args [<Object>]
515
+ # arguments for the enumerator method.
516
+ # @return [SubFrames]
517
+ # a created SubFrames.
518
+ # @example Create a SubFrames object sliced by 3 rows.
519
+ # df.sub_by_enum(:each_slice, 3)
520
+ #
521
+ # # =>
522
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
523
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
524
+ # 2 SubFrames: [3, 3] in sizes.
525
+ # ---
526
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
527
+ # x y z
528
+ # <uint8> <string> <boolean>
529
+ # 0 1 A false
530
+ # 1 2 A true
531
+ # 2 3 B false
532
+ # ---
533
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
534
+ # x y z
535
+ # <uint8> <string> <boolean>
536
+ # 0 4 B (nil)
537
+ # 1 5 B true
538
+ # 2 6 C false
539
+ #
540
+ # @example Create a SubFrames object for each consecutive 3 rows.
541
+ # df.sub_by_enum(:each_cons, 4)
542
+ #
543
+ # # =>
544
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
545
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
546
+ # 3 SubFrames: [4, 4, 4] in sizes.
547
+ # ---
548
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
549
+ # x y z
550
+ # <uint8> <string> <boolean>
551
+ # 0 1 A false
552
+ # 1 2 A true
553
+ # 2 3 B false
554
+ # 3 4 B (nil)
555
+ # ---
556
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
557
+ # x y z
558
+ # <uint8> <string> <boolean>
559
+ # 0 2 A true
560
+ # 1 3 B false
561
+ # 2 4 B (nil)
562
+ # 3 5 B true
563
+ # ---
564
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
565
+ # x y z
566
+ # <uint8> <string> <boolean>
567
+ # 0 3 B false
568
+ # 1 4 B (nil)
569
+ # 2 5 B true
570
+ # 3 6 C false
571
+ #
572
+ # @since 0.4.0
573
+ #
574
+ def sub_by_enum(enumerator_method, *args)
575
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
576
+ end
577
+ alias_method :subframes_by_enum, :sub_by_enum
578
+
579
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
580
+ #
581
+ # [Experimental feature] this method may be removed or be changed in the future.
582
+ # @param kernel [Array<true, false>, Vector]
583
+ # boolean array-like to pick records in the window.
584
+ # Kernel is a boolean Array and it behaves like a masked window.
585
+ # @param step [Integer]
586
+ # moving step of window.
587
+ # @return [SubFrames]
588
+ # a created SubFrames.
589
+ # @example
590
+ # kernel = [true, false, false, true]
591
+ # df.sub_by_kernel(kernel, step: 2)
592
+ #
593
+ # # =>
594
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
595
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
596
+ # 2 SubFrames: [2, 2] in sizes.
597
+ # ---
598
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
599
+ # x y z
600
+ # <uint8> <string> <boolean>
601
+ # 0 1 A false
602
+ # 1 4 B (nil)
603
+ # ---
604
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
605
+ # x y z
606
+ # <uint8> <string> <boolean>
607
+ # 0 3 B false
608
+ # 1 6 C false
609
+ #
610
+ # @since 0.4.0
611
+ #
612
+ def sub_by_kernel(kernel, step: 1)
613
+ limit_size = size - kernel.size
614
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
615
+ SubFrames.new(self) do
616
+ 0.step(by: step, to: limit_size).map do |i|
617
+ kernel_vector.shift(i)
618
+ end
619
+ end
620
+ end
621
+ alias_method :subframes_by_kernel, :sub_by_kernel
622
+
623
+ # Generic builder of sub-dataframes from self.
624
+ #
625
+ # [Experimental feature] this method may be removed or be changed in the future.
626
+ # @overload build_subframes(subset_specifier)
627
+ # Create a new SubFrames object.
628
+ #
629
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
630
+ # an Array of numeric indices or boolean filters
631
+ # to create subsets of DataFrame.
632
+ # @return [SubFrames]
633
+ # new SubFrames.
634
+ # @example
635
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
636
+ #
637
+ # # =>
638
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
639
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
640
+ # 2 SubFrames: [3, 3] in sizes.
641
+ # ---
642
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
643
+ # x y z
644
+ # <uint8> <string> <boolean>
645
+ # 0 1 A false
646
+ # 1 3 B false
647
+ # 2 5 B true
648
+ # ---
649
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
650
+ # x y z
651
+ # <uint8> <string> <boolean>
652
+ # 0 2 A true
653
+ # 1 4 B (nil)
654
+ # 2 6 C false
655
+ #
656
+ # @overload build_subframes
657
+ # Create a new SubFrames object by block.
658
+ #
659
+ # @yield [self]
660
+ # the block is called within the context of self.
661
+ # (Block is called by instance_eval(&block). )
662
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
663
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
664
+ # All array-likes are responsible to #numeric? or #boolean?.
665
+ # @example
666
+ # dataframe.build_subframes do
667
+ # even = indices.map(&:even?)
668
+ # [even, !even]
669
+ # end
670
+ #
671
+ # # =>
672
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
673
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
674
+ # 2 SubFrames: [3, 3] in sizes.
675
+ # ---
676
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
677
+ # x y z
678
+ # <uint8> <string> <boolean>
679
+ # 0 1 A false
680
+ # 1 3 B false
681
+ # 2 5 B true
682
+ # ---
683
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
684
+ # x y z
685
+ # <uint8> <string> <boolean>
686
+ # 0 2 A true
687
+ # 1 4 B (nil)
688
+ # 2 6 C false
689
+ #
690
+ # @since 0.4.0
691
+ #
692
+ def build_subframes(subset_specifier = nil, &block)
693
+ if block
694
+ SubFrames.new(self, instance_eval(&block))
695
+ else
696
+ SubFrames.new(self, subset_specifier)
697
+ end
698
+ end
699
+
700
+ # Catch variable (column) key as method name.
332
701
  def method_missing(name, *args, &block)
333
- return v(name) if args.empty? && key?(name)
702
+ return variables[name] if args.empty? && key?(name)
334
703
 
335
704
  super
336
705
  end
337
706
 
707
+ # Catch variable (column) key as method name.
338
708
  def respond_to_missing?(name, include_private)
339
709
  return true if key?(name)
340
710
 
@@ -346,15 +716,16 @@ module RedAmber
346
716
  # initialize @variable, @keys, @vectors and return one of them
347
717
  def init_instance_vars(var)
348
718
  ary =
349
- @table.columns
350
- .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
351
- v = Vector.create(column.data)
352
- k = column.name.to_sym
353
- v.key = k
354
- variables[k] = v
355
- keys << k
356
- vectors << v
357
- end
719
+ @table
720
+ .columns
721
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
722
+ v = Vector.create(column.data)
723
+ k = column.name.to_sym
724
+ v.key = k
725
+ variables[k] = v
726
+ keys << k
727
+ vectors << v
728
+ end
358
729
 
359
730
  @variables, @keys, @vectors = ary
360
731
  ary[%i[variables keys vectors].index(var)]
@@ -369,11 +740,9 @@ module RedAmber
369
740
  end
370
741
 
371
742
  def name_unnamed_keys
372
- return unless @table.key?('')
743
+ return unless @table.key?(:'')
373
744
 
374
- # We can't use #keys because it causes mismatch of @table and @keys
375
- keys = @table.schema.fields.map { |f| f.name.to_sym }
376
- unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
745
+ unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
377
746
  fields =
378
747
  @table.schema.fields.map do |field|
379
748
  if field.name.empty?