red_amber 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -17,26 +17,79 @@ module RedAmber
17
17
  using RefineArrowTable
18
18
  using RefineHash
19
19
 
20
- # Quicker DataFrame construction from a `Arrow::Table`.
21
- #
22
- # @param table [Arrow::Table] A table to have in the DataFrame.
23
- # @return [DataFrame] Initialized DataFrame.
24
- #
25
- # @note This method will allocate table directly and may be used in the method.
26
- # @note `table` must have unique keys.
27
- def self.create(table)
28
- instance = allocate
29
- instance.instance_variable_set(:@table, table)
30
- instance
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
36
+
37
+ # Return new DataFrame for specified schema and value.
38
+ #
39
+ # @param dataframe_for_schema [Dataframe]
40
+ # schema of this dataframe will be used.
41
+ # @param dataframe_for_value [DataFrame]
42
+ # column values of thes dataframe will be used.
43
+ # @return [DataFrame]
44
+ # created DataFrame.
45
+ # @since 0.4.1
46
+ #
47
+ def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
48
+ DataFrame.create(
49
+ Arrow::Table.new(dataframe_for_schema.table.schema,
50
+ dataframe_for_value.table.columns)
51
+ )
52
+ end
31
53
  end
32
54
 
33
55
  # Creates a new DataFrame.
34
56
  #
57
+ # @overload initialize(hash)
58
+ # Initialize a DataFrame by a Hash.
59
+ #
60
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
61
+ # a Hash of `key` with array-like for column values.
62
+ # `key`s are Symbol or String.
63
+ # @example Initialize by a Hash
64
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
65
+ # DataFrame.new(hash)
66
+ # @example Initialize by a Hash like arguments.
67
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
68
+ # @example Initialize from #to_arrow_array responsibles.
69
+ # # #to_arrow_array responsible `array-like` is also available.
70
+ # require 'arrow-numo-narray'
71
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
72
+ #
35
73
  # @overload initialize(table)
36
- # Initialize DataFrame by an `Arrow::Table`
74
+ # Initialize a DataFrame by an `Arrow::Table`.
37
75
  #
38
76
  # @param table [Arrow::Table]
39
- # A table to have in the DataFrame.
77
+ # a table to have in the DataFrame.
78
+ # @example Initialize by a Table
79
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
80
+ # DataFrame.new(table)
81
+ #
82
+ # @overload initialize(schama, row_oriented_array)
83
+ # Initialize a DataFrame by schema and row_oriented_array.
84
+ #
85
+ # @param schema [Hash<key => type>]
86
+ # a schema of key and data type.
87
+ # @param row_oriented_array [Array]
88
+ # an Array of rows.
89
+ # @example Initialize by a schema and a row_oriented_array.
90
+ # schema = { x: :uint8, y: :string }
91
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
92
+ # DataFrame.new(schema, row_oriented_array)
40
93
  #
41
94
  # @overload initialize(arrowable)
42
95
  # Initialize DataFrame by a `#to_arrow` responsible object.
@@ -47,6 +100,11 @@ module RedAmber
47
100
  #
48
101
  # @note `RedAmber::DataFrame` itself is readable by this.
49
102
  # @note Hash is refined to respond to `#to_arrow` in this class.
103
+ # @example Initialize by Red Dataset object.
104
+ # require 'datasets-arrow'
105
+ # dataset = Datasets::Penguins.new
106
+ # penguins = DataFrame.new(dataset)
107
+ # @since 0.2.2
50
108
  #
51
109
  # @overload initialize(rover_like)
52
110
  # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
@@ -60,20 +118,18 @@ module RedAmber
60
118
  # @overload initialize()
61
119
  # Create empty DataFrame
62
120
  #
63
- # @example DataFrame.new
121
+ # @example
122
+ # DataFrame.new
64
123
  #
65
124
  # @overload initialize(empty)
66
125
  # Create empty DataFrame
67
126
  #
68
127
  # @param empty [nil, [], {}]
69
128
  #
70
- # @example DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
71
- #
72
- # @overload initialize(args)
73
- #
74
- # @param args [values]
75
- # Accepts any argments which is valid for `Arrow::Table.new(args)`. See
76
- # {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
129
+ # @example Return empty DataFrame.
130
+ # DataFrame.new([])
131
+ # DataFrame.new({})
132
+ # DataFrame.new(nil)
77
133
  #
78
134
  def initialize(*args)
79
135
  case args
@@ -109,15 +165,16 @@ module RedAmber
109
165
 
110
166
  # Returns the table having within.
111
167
  #
112
- # @return [Arrow::Table] The table within.
168
+ # @return [Arrow::Table]
169
+ # the table within.
113
170
  #
114
171
  attr_reader :table
115
-
116
172
  alias_method :to_arrow, :table
117
173
 
118
- # Returns the number of rows.
174
+ # Returns the number of records (rows).
119
175
  #
120
- # @return [Integer] Number of rows.
176
+ # @return [Integer]
177
+ # number of records (rows).
121
178
  #
122
179
  def size
123
180
  @table.n_rows
@@ -126,9 +183,10 @@ module RedAmber
126
183
  alias_method :n_obs, :size
127
184
  alias_method :n_rows, :size
128
185
 
129
- # Returns the number of columns.
186
+ # Returns the number of variables (columns).
130
187
  #
131
- # @return [Integer] Number of columns.
188
+ # @return [Integer]
189
+ # number of variables (columns).
132
190
  #
133
191
  def n_keys
134
192
  @table.n_columns
@@ -140,7 +198,7 @@ module RedAmber
140
198
  # Returns the numbers of rows and columns.
141
199
  #
142
200
  # @return [Array]
143
- # Number of rows and number of columns in an array.
201
+ # number of rows and number of columns in an array.
144
202
  # Same as [size, n_keys].
145
203
  #
146
204
  def shape
@@ -153,26 +211,27 @@ module RedAmber
153
211
  # `key => Vector` pairs for each columns.
154
212
  #
155
213
  def variables
156
- @variables || @variables = init_instance_vars(:variables)
214
+ @variables ||= init_instance_vars(:variables)
157
215
  end
158
216
  alias_method :vars, :variables
159
217
 
160
218
  # Returns an Array of keys.
161
219
  #
162
220
  # @return [Array]
163
- # Keys in an Array.
221
+ # keys in an Array.
164
222
  #
165
223
  def keys
166
- @keys || @keys = init_instance_vars(:keys)
224
+ @keys ||= init_instance_vars(:keys)
167
225
  end
168
226
  alias_method :column_names, :keys
169
227
  alias_method :var_names, :keys
170
228
 
171
229
  # Returns true if self has a specified key in the argument.
172
230
  #
173
- # @param key [Symbol, String] Key to test.
231
+ # @param key [Symbol, String]
232
+ # key to test.
174
233
  # @return [Boolean]
175
- # Returns true if self has key in Symbol.
234
+ # returns true if self has key in Symbol.
176
235
  #
177
236
  def key?(key)
178
237
  keys.include?(key.to_sym)
@@ -181,9 +240,10 @@ module RedAmber
181
240
 
182
241
  # Returns index of specified key in the Array keys.
183
242
  #
184
- # @param key [Symbol, String] key to know.
243
+ # @param key [Symbol, String]
244
+ # key to know.
185
245
  # @return [Integer]
186
- # Index of key in the Array keys.
246
+ # index of key in the Array keys.
187
247
  #
188
248
  def key_index(key)
189
249
  keys.find_index(key.to_sym)
@@ -194,10 +254,10 @@ module RedAmber
194
254
  # Returns abbreviated type names in an Array.
195
255
  #
196
256
  # @return [Array]
197
- # Abbreviated Red Arrow data type names.
257
+ # abbreviated Red Arrow data type names.
198
258
  #
199
259
  def types
200
- @types || @types = @table.columns.map do |column|
260
+ @types ||= @table.columns.map do |column|
201
261
  column.data.value_type.nick.to_sym
202
262
  end
203
263
  end
@@ -205,43 +265,25 @@ module RedAmber
205
265
  # Returns an Array of Classes of data type.
206
266
  #
207
267
  # @return [Array]
208
- # An Array of Red Arrow data type Classes.
268
+ # an Array of Red Arrow data type Classes.
209
269
  #
210
270
  def type_classes
211
- @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
271
+ @type_classes ||= @table.columns.map { |column| column.data_type.class }
212
272
  end
213
273
 
214
274
  # Returns Vectors in an Array.
215
275
  #
216
276
  # @return [Array]
217
- # An Array of `RedAmber::Vector`s.
277
+ # an Array of Vector.
218
278
  #
219
279
  def vectors
220
- @vectors || @vectors = init_instance_vars(:vectors)
280
+ @vectors ||= init_instance_vars(:vectors)
221
281
  end
222
282
 
223
- # Returns row indices (start...(size+start)) in a Vector.
224
- #
225
- # @param start [Object]
226
- # Object which have `#succ` method.
227
- #
228
- # @return [Array]
229
- # A Vector of row indices.
230
- #
231
- # @example
232
- # (when self.size == 5)
233
- # - indices #=> Vector[0, 1, 2, 3, 4]
234
- # - indices(1) #=> Vector[1, 2, 3, 4, 5]
235
- # - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
236
- #
237
- def indices(start = 0)
238
- Vector.new((start..).take(size))
239
- end
240
- alias_method :indexes, :indices
241
-
242
283
  # Returns column-oriented data in a Hash.
243
284
  #
244
- # @return [Hash] A Hash of 'key => column_in_an_array'.
285
+ # @return [Hash]
286
+ # a Hash of 'key => column_in_an_array'.
245
287
  #
246
288
  def to_h
247
289
  variables.transform_values(&:to_a)
@@ -249,7 +291,8 @@ module RedAmber
249
291
 
250
292
  # Returns a row-oriented array without header.
251
293
  #
252
- # @return [Array] Row-oriented data without header.
294
+ # @return [Array]
295
+ # row-oriented data without header.
253
296
  #
254
297
  # @note If you need column-oriented array, use `.to_h.to_a`.
255
298
  #
@@ -260,7 +303,8 @@ module RedAmber
260
303
 
261
304
  # Returns column name and data type in a Hash.
262
305
  #
263
- # @return [Hash] Column name and data type.
306
+ # @return [Hash]
307
+ # column name and data type.
264
308
  #
265
309
  # @example
266
310
  # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
@@ -273,7 +317,7 @@ module RedAmber
273
317
  # Compare DataFrames.
274
318
  #
275
319
  # @return [true, false]
276
- # True if other is a DataFrame and table is same.
320
+ # true if other is a DataFrame and table is same.
277
321
  # Otherwise return false.
278
322
  #
279
323
  def ==(other)
@@ -282,7 +326,8 @@ module RedAmber
282
326
 
283
327
  # Check if it is a empty DataFrame.
284
328
  #
285
- # @return [true, false] True if it has no columns.
329
+ # @return [true, false
330
+ # ] true if it has no columns.
286
331
  #
287
332
  def empty?
288
333
  variables.empty?
@@ -293,14 +338,18 @@ module RedAmber
293
338
  # @overload each_row
294
339
  # Returns Enumerator when no block given.
295
340
  #
296
- # @return [Enumerator] Enumerator of each rows.
341
+ # @return [Enumerator]
342
+ # enumerator of each rows.
297
343
  #
298
344
  # @overload each_row(&block)
299
345
  # Yields with key and row pairs.
300
346
  #
301
- # @yield [key_row_pairs] Yields with key and row pairs.
302
- # @yieldparam [Hash] Key and row pairs.
303
- # @yieldreturn [Integer] Size of the DataFrame.
347
+ # @yieldparam key_row_pairs [Hash]
348
+ # key and row pairs.
349
+ # @yieldreturn [Integer]
350
+ # size of the DataFrame.
351
+ # @return [Integer]
352
+ # returns size.
304
353
  #
305
354
  def each_row
306
355
  return enum_for(:each_row) unless block_given?
@@ -316,25 +365,346 @@ module RedAmber
316
365
 
317
366
  # Returns self in a `Rover::DataFrame`.
318
367
  #
319
- # @return [Rover::DataFrame] A `Rover::DataFrame`.
368
+ # @return [Rover::DataFrame]
369
+ # a `Rover::DataFrame`.
320
370
  #
321
371
  def to_rover
322
372
  require 'rover'
323
373
  Rover::DataFrame.new(to_h)
324
374
  end
325
375
 
376
+ # Create a Group object. Or create a Group and summarize it.
377
+ #
378
+ # @overload group(*group_keys)
379
+ # Create a Group object.
380
+ #
381
+ # @param group_keys [Array<Symbol, String>]
382
+ # keys for grouping.
383
+ # @return [Group]
384
+ # Group object.
385
+ # @example Create a Group
386
+ # penguins.group(:species)
387
+ #
388
+ # # =>
389
+ # #<RedAmber::Group : 0x000000000000c3c8>
390
+ # species group_count
391
+ # <string> <uint8>
392
+ # 0 Adelie 152
393
+ # 1 Chinstrap 68
394
+ # 2 Gentoo 124
395
+ #
396
+ # @overload group(*group_keys)
397
+ # Create a Group and summarize it by aggregation functions from the block.
398
+ #
399
+ # @yieldparam group [Group]
400
+ # passes Group object.
401
+ # @yieldreturn [DataFrame, Array<DataFrame>]
402
+ # an aggregated DataFrame or an array of aggregated DataFrames.
403
+ # @return [DataFrame]
404
+ # summarized DataFrame.
405
+ # @example Create a group and summarize it.
406
+ # penguins.group(:species) { mean(:bill_length_mm) }
407
+ #
408
+ # # =>
409
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
410
+ # species mean(bill_length_mm)
411
+ # <string> <double>
412
+ # 0 Adelie 38.79
413
+ # 1 Chinstrap 48.83
414
+ # 2 Gentoo 47.5
415
+ #
326
416
  def group(*group_keys, &block)
327
417
  g = Group.new(self, group_keys)
328
418
  g = g.summarize(&block) if block
329
419
  g
330
420
  end
331
421
 
422
+ # Create SubFrames by value grouping.
423
+ #
424
+ # [Experimental feature] this method may be removed or be changed in the future.
425
+ # @param keys [Symbol, String, Array<Symbol, String>]
426
+ # grouping keys.
427
+ # @return [SubFrames]
428
+ # a created SubFrames grouped by column values on `keys`.
429
+ # @example
430
+ # df.sub_by_value(keys: :y)
431
+ #
432
+ # # =>
433
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
434
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
435
+ # 3 SubFrames: [2, 3, 1] in sizes.
436
+ # ---
437
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
438
+ # x y z
439
+ # <uint8> <string> <boolean>
440
+ # 0 1 A false
441
+ # 1 2 A true
442
+ # ---
443
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
444
+ # x y z
445
+ # <uint8> <string> <boolean>
446
+ # 0 3 B false
447
+ # 1 4 B (nil)
448
+ # 2 5 B true
449
+ # ---
450
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
451
+ # x y z
452
+ # <uint8> <string> <boolean>
453
+ # 0 6 C false
454
+ #
455
+ # @since 0.4.0
456
+ #
457
+ def sub_by_value(keys: nil)
458
+ SubFrames.new(self, group(keys).filters)
459
+ end
460
+ alias_method :subframes_by_value, :sub_by_value
461
+
462
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
463
+ #
464
+ # [Experimental feature] this method may be removed or be changed in the future.
465
+ # @param from [Integer]
466
+ # start position of window.
467
+ # @param size [Integer]
468
+ # window size.
469
+ # @param step [Integer]
470
+ # moving step of window.
471
+ # @return [SubFrames]
472
+ # a created SubFrames.
473
+ # @example
474
+ # df.sub_by_window(size: 4, step: 2)
475
+ #
476
+ # # =>
477
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
478
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
479
+ # 2 SubFrames: [4, 4] in sizes.
480
+ # ---
481
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
482
+ # x y z
483
+ # <uint8> <string> <boolean>
484
+ # 0 1 A false
485
+ # 1 2 A true
486
+ # 2 3 B false
487
+ # 3 4 B (nil)
488
+ # ---
489
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
490
+ # x y z
491
+ # <uint8> <string> <boolean>
492
+ # 0 3 B false
493
+ # 1 4 B (nil)
494
+ # 2 5 B true
495
+ # 3 6 C false
496
+ #
497
+ # @since 0.4.0
498
+ #
499
+ def sub_by_window(from: 0, size: nil, step: 1)
500
+ SubFrames.new(self) do
501
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
502
+ [*i...(i + size)]
503
+ end
504
+ end
505
+ end
506
+ alias_method :subframes_by_window, :sub_by_window
507
+
508
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
509
+ #
510
+ # This method will process the indices of self by enumerator.
511
+ # [Experimental feature] this method may be removed or be changed in the future.
512
+ # @param enumerator_method [Symbol]
513
+ # Enumerator name.
514
+ # @param args [<Object>]
515
+ # arguments for the enumerator method.
516
+ # @return [SubFrames]
517
+ # a created SubFrames.
518
+ # @example Create a SubFrames object sliced by 3 rows.
519
+ # df.sub_by_enum(:each_slice, 3)
520
+ #
521
+ # # =>
522
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
523
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
524
+ # 2 SubFrames: [3, 3] in sizes.
525
+ # ---
526
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
527
+ # x y z
528
+ # <uint8> <string> <boolean>
529
+ # 0 1 A false
530
+ # 1 2 A true
531
+ # 2 3 B false
532
+ # ---
533
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
534
+ # x y z
535
+ # <uint8> <string> <boolean>
536
+ # 0 4 B (nil)
537
+ # 1 5 B true
538
+ # 2 6 C false
539
+ #
540
+ # @example Create a SubFrames object for each consecutive 3 rows.
541
+ # df.sub_by_enum(:each_cons, 4)
542
+ #
543
+ # # =>
544
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
545
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
546
+ # 3 SubFrames: [4, 4, 4] in sizes.
547
+ # ---
548
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
549
+ # x y z
550
+ # <uint8> <string> <boolean>
551
+ # 0 1 A false
552
+ # 1 2 A true
553
+ # 2 3 B false
554
+ # 3 4 B (nil)
555
+ # ---
556
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
557
+ # x y z
558
+ # <uint8> <string> <boolean>
559
+ # 0 2 A true
560
+ # 1 3 B false
561
+ # 2 4 B (nil)
562
+ # 3 5 B true
563
+ # ---
564
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
565
+ # x y z
566
+ # <uint8> <string> <boolean>
567
+ # 0 3 B false
568
+ # 1 4 B (nil)
569
+ # 2 5 B true
570
+ # 3 6 C false
571
+ #
572
+ # @since 0.4.0
573
+ #
574
+ def sub_by_enum(enumerator_method, *args)
575
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
576
+ end
577
+ alias_method :subframes_by_enum, :sub_by_enum
578
+
579
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
580
+ #
581
+ # [Experimental feature] this method may be removed or be changed in the future.
582
+ # @param kernel [Array<true, false>, Vector]
583
+ # boolean array-like to pick records in the window.
584
+ # Kernel is a boolean Array and it behaves like a masked window.
585
+ # @param step [Integer]
586
+ # moving step of window.
587
+ # @return [SubFrames]
588
+ # a created SubFrames.
589
+ # @example
590
+ # kernel = [true, false, false, true]
591
+ # df.sub_by_kernel(kernel, step: 2)
592
+ #
593
+ # # =>
594
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
595
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
596
+ # 2 SubFrames: [2, 2] in sizes.
597
+ # ---
598
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
599
+ # x y z
600
+ # <uint8> <string> <boolean>
601
+ # 0 1 A false
602
+ # 1 4 B (nil)
603
+ # ---
604
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
605
+ # x y z
606
+ # <uint8> <string> <boolean>
607
+ # 0 3 B false
608
+ # 1 6 C false
609
+ #
610
+ # @since 0.4.0
611
+ #
612
+ def sub_by_kernel(kernel, step: 1)
613
+ limit_size = size - kernel.size
614
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
615
+ SubFrames.new(self) do
616
+ 0.step(by: step, to: limit_size).map do |i|
617
+ kernel_vector.shift(i)
618
+ end
619
+ end
620
+ end
621
+ alias_method :subframes_by_kernel, :sub_by_kernel
622
+
623
+ # Generic builder of sub-dataframes from self.
624
+ #
625
+ # [Experimental feature] this method may be removed or be changed in the future.
626
+ # @overload build_subframes(subset_specifier)
627
+ # Create a new SubFrames object.
628
+ #
629
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
630
+ # an Array of numeric indices or boolean filters
631
+ # to create subsets of DataFrame.
632
+ # @return [SubFrames]
633
+ # new SubFrames.
634
+ # @example
635
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
636
+ #
637
+ # # =>
638
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
639
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
640
+ # 2 SubFrames: [3, 3] in sizes.
641
+ # ---
642
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
643
+ # x y z
644
+ # <uint8> <string> <boolean>
645
+ # 0 1 A false
646
+ # 1 3 B false
647
+ # 2 5 B true
648
+ # ---
649
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
650
+ # x y z
651
+ # <uint8> <string> <boolean>
652
+ # 0 2 A true
653
+ # 1 4 B (nil)
654
+ # 2 6 C false
655
+ #
656
+ # @overload build_subframes
657
+ # Create a new SubFrames object by block.
658
+ #
659
+ # @yield [self]
660
+ # the block is called within the context of self.
661
+ # (Block is called by instance_eval(&block). )
662
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
663
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
664
+ # All array-likes are responsible to #numeric? or #boolean?.
665
+ # @example
666
+ # dataframe.build_subframes do
667
+ # even = indices.map(&:even?)
668
+ # [even, !even]
669
+ # end
670
+ #
671
+ # # =>
672
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
673
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
674
+ # 2 SubFrames: [3, 3] in sizes.
675
+ # ---
676
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
677
+ # x y z
678
+ # <uint8> <string> <boolean>
679
+ # 0 1 A false
680
+ # 1 3 B false
681
+ # 2 5 B true
682
+ # ---
683
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
684
+ # x y z
685
+ # <uint8> <string> <boolean>
686
+ # 0 2 A true
687
+ # 1 4 B (nil)
688
+ # 2 6 C false
689
+ #
690
+ # @since 0.4.0
691
+ #
692
+ def build_subframes(subset_specifier = nil, &block)
693
+ if block
694
+ SubFrames.new(self, instance_eval(&block))
695
+ else
696
+ SubFrames.new(self, subset_specifier)
697
+ end
698
+ end
699
+
700
+ # Catch variable (column) key as method name.
332
701
  def method_missing(name, *args, &block)
333
- return v(name) if args.empty? && key?(name)
702
+ return variables[name] if args.empty? && key?(name)
334
703
 
335
704
  super
336
705
  end
337
706
 
707
+ # Catch variable (column) key as method name.
338
708
  def respond_to_missing?(name, include_private)
339
709
  return true if key?(name)
340
710
 
@@ -346,15 +716,16 @@ module RedAmber
346
716
  # initialize @variable, @keys, @vectors and return one of them
347
717
  def init_instance_vars(var)
348
718
  ary =
349
- @table.columns
350
- .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
351
- v = Vector.create(column.data)
352
- k = column.name.to_sym
353
- v.key = k
354
- variables[k] = v
355
- keys << k
356
- vectors << v
357
- end
719
+ @table
720
+ .columns
721
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
722
+ v = Vector.create(column.data)
723
+ k = column.name.to_sym
724
+ v.key = k
725
+ variables[k] = v
726
+ keys << k
727
+ vectors << v
728
+ end
358
729
 
359
730
  @variables, @keys, @vectors = ary
360
731
  ary[%i[variables keys vectors].index(var)]
@@ -369,11 +740,9 @@ module RedAmber
369
740
  end
370
741
 
371
742
  def name_unnamed_keys
372
- return unless @table.key?('')
743
+ return unless @table.key?(:'')
373
744
 
374
- # We can't use #keys because it causes mismatch of @table and @keys
375
- keys = @table.schema.fields.map { |f| f.name.to_sym }
376
- unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
745
+ unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
377
746
  fields =
378
747
  @table.schema.fields.map do |field|
379
748
  if field.name.empty?