red_amber 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -51
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +203 -1
  5. data/Gemfile +2 -1
  6. data/LICENSE +1 -1
  7. data/README.md +61 -45
  8. data/benchmark/basic.yml +11 -4
  9. data/benchmark/combine.yml +3 -4
  10. data/benchmark/dataframe.yml +62 -0
  11. data/benchmark/group.yml +7 -1
  12. data/benchmark/reshape.yml +6 -2
  13. data/benchmark/vector.yml +63 -0
  14. data/doc/DataFrame.md +35 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +295 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +537 -68
  20. data/lib/red_amber/data_frame_combinable.rb +776 -123
  21. data/lib/red_amber/data_frame_displayable.rb +248 -18
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +81 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +216 -21
  25. data/lib/red_amber/data_frame_selectable.rb +781 -120
  26. data/lib/red_amber/data_frame_variable_operation.rb +561 -85
  27. data/lib/red_amber/group.rb +195 -21
  28. data/lib/red_amber/helper.rb +114 -32
  29. data/lib/red_amber/refinements.rb +206 -0
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +435 -58
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +321 -69
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +397 -24
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +15 -1
  39. data/red_amber.gemspec +4 -3
  40. metadata +19 -11
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -294
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -14,65 +14,151 @@ module RedAmber
14
14
  include DataFrameVariableOperation
15
15
  include Helper
16
16
 
17
- # Creates a new RedAmber::DataFrame.
17
+ using RefineArrowTable
18
+ using RefineHash
19
+
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
36
+ end
37
+
38
+ # Creates a new DataFrame.
18
39
  #
19
40
  # @overload initialize(hash)
41
+ # Initialize a DataFrame by a Hash.
20
42
  #
21
- # @params hash [Hash]
43
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
44
+ # a Hash of `key` with array-like for column values.
45
+ # `key`s are Symbol or String.
46
+ # @example Initialize by a Hash
47
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
48
+ # DataFrame.new(hash)
49
+ # @example Initialize by a Hash like arguments.
50
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
51
+ # @example Initialize from #to_arrow_array responsibles.
52
+ # # #to_arrow_array responsible `array-like` is also available.
53
+ # require 'arrow-numo-narray'
54
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
22
55
  #
23
56
  # @overload initialize(table)
57
+ # Initialize a DataFrame by an `Arrow::Table`.
58
+ #
59
+ # @param table [Arrow::Table]
60
+ # a table to have in the DataFrame.
61
+ # @example Initialize by a Table
62
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
63
+ # DataFrame.new(table)
64
+ #
65
+ # @overload initialize(schama, row_oriented_array)
66
+ # Initialize a DataFrame by schema and row_oriented_array.
67
+ #
68
+ # @param schema [Hash<key => type>]
69
+ # a schema of key and data type.
70
+ # @param row_oriented_array [Array]
71
+ # an Array of rows.
72
+ # @example Initialize by a schema and a row_oriented_array.
73
+ # schema = { x: :uint8, y: :string }
74
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
75
+ # DataFrame.new(schema, row_oriented_array)
76
+ #
77
+ # @overload initialize(arrowable)
78
+ # Initialize DataFrame by a `#to_arrow` responsible object.
79
+ #
80
+ # @param arrowable [#to_arrow]
81
+ # Any object which responds to `#to_arrow`.
82
+ # `#to_arrow` must return `Arrow::Table`.
24
83
  #
25
- # @params table [Arrow::Table]
84
+ # @note `RedAmber::DataFrame` itself is readable by this.
85
+ # @note Hash is refined to respond to `#to_arrow` in this class.
86
+ # @example Initialize by Red Dataset object.
87
+ # require 'datasets-arrow'
88
+ # dataset = Datasets::Penguins.new
89
+ # penguins = DataFrame.new(dataset)
90
+ # @since 0.2.2
26
91
  #
27
- # @overload initialize(dataframe)
92
+ # @overload initialize(rover_like)
93
+ # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
28
94
  #
29
- # @params dataframe [RedAmber::DataFrame, Rover::DataFrame]
95
+ # @param rover_like [#to_h]
96
+ # Any object which responds to `#to_h`.
97
+ # `#to_h` must return a Hash which is convertable by `Arrow::Table.new`.
30
98
  #
31
- # @overload initialize(null)
99
+ # @note `Rover::DataFrame` is readable by this.
32
100
  #
33
- # @params null [NilClass] No arguments.
101
+ # @overload initialize()
102
+ # Create empty DataFrame
103
+ #
104
+ # @example
105
+ # DataFrame.new
106
+ #
107
+ # @overload initialize(empty)
108
+ # Create empty DataFrame
109
+ #
110
+ # @param empty [nil, [], {}]
111
+ #
112
+ # @example Return empty DataFrame.
113
+ # DataFrame.new([])
114
+ # DataFrame.new({})
115
+ # DataFrame.new(nil)
34
116
  #
35
117
  def initialize(*args)
36
- @variables = @keys = @vectors = @types = @data_types = nil
37
118
  case args
38
119
  in nil | [nil] | [] | {} | [[]] | [{}]
39
- # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
40
- # returns empty DataFrame
41
120
  @table = Arrow::Table.new({}, [])
42
- in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
121
+ in [Arrow::Table => table]
122
+ @table = table
123
+ in [arrowable] if arrowable.respond_to?(:to_arrow)
43
124
  table = arrowable.to_arrow
44
125
  unless table.is_a?(Arrow::Table)
45
126
  raise DataFrameTypeError,
46
127
  "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
47
128
  end
48
129
  @table = table
49
- in [Arrow::Table => table]
50
- @table = table
51
- in [rover_or_hash]
130
+ in [rover_like] if rover_like.respond_to?(:to_h)
52
131
  begin
53
- # Accepts Rover::DataFrame or Hash
54
- @table = Arrow::Table.new(rover_or_hash.to_h)
132
+ # Accepts Rover::DataFrame
133
+ @table = Arrow::Table.new(rover_like.to_h)
55
134
  rescue StandardError
56
- raise DataFrameTypeError, "invalid argument: #{rover_or_hash}"
135
+ raise DataFrameTypeError, "to_h must return Arrowable object: #{rover_like}"
57
136
  end
58
137
  else
59
- @table = Arrow::Table.new(*args)
138
+ begin
139
+ @table = Arrow::Table.new(*args)
140
+ rescue StandardError
141
+ raise DataFrameTypeError, "invalid argument to create Arrow::Table: #{args}"
142
+ end
60
143
  end
61
- name_unnamed_keys
62
144
 
63
- duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
64
- raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
145
+ name_unnamed_keys
146
+ check_duplicate_keys(keys)
65
147
  end
66
148
 
149
+ # Returns the table having within.
150
+ #
151
+ # @return [Arrow::Table]
152
+ # the table within.
153
+ #
67
154
  attr_reader :table
155
+ alias_method :to_arrow, :table
68
156
 
69
- def to_arrow
70
- @table
71
- end
72
-
73
- # Returns the number of rows.
157
+ # Returns the number of records (rows).
158
+ #
159
+ # @return [Integer]
160
+ # number of records (rows).
74
161
  #
75
- # @return [Integer] Number of rows.
76
162
  def size
77
163
  @table.n_rows
78
164
  end
@@ -80,9 +166,11 @@ module RedAmber
80
166
  alias_method :n_obs, :size
81
167
  alias_method :n_rows, :size
82
168
 
83
- # Returns the number of columns.
169
+ # Returns the number of variables (columns).
170
+ #
171
+ # @return [Integer]
172
+ # number of variables (columns).
84
173
  #
85
- # @return [Integer] Number of columns.
86
174
  def n_keys
87
175
  @table.n_columns
88
176
  end
@@ -93,8 +181,9 @@ module RedAmber
93
181
  # Returns the numbers of rows and columns.
94
182
  #
95
183
  # @return [Array]
96
- # Number of rows and number of columns in an array.
184
+ # number of rows and number of columns in an array.
97
185
  # Same as [size, n_keys].
186
+ #
98
187
  def shape
99
188
  [size, n_keys]
100
189
  end
@@ -102,7 +191,8 @@ module RedAmber
102
191
  # Returns a Hash of key and Vector pairs in the columns.
103
192
  #
104
193
  # @return [Hash]
105
- # key => Vector pairs for each columns.
194
+ # `key => Vector` pairs for each columns.
195
+ #
106
196
  def variables
107
197
  @variables || @variables = init_instance_vars(:variables)
108
198
  end
@@ -111,7 +201,8 @@ module RedAmber
111
201
  # Returns an Array of keys.
112
202
  #
113
203
  # @return [Array]
114
- # Keys in an Array.
204
+ # keys in an Array.
205
+ #
115
206
  def keys
116
207
  @keys || @keys = init_instance_vars(:keys)
117
208
  end
@@ -120,9 +211,11 @@ module RedAmber
120
211
 
121
212
  # Returns true if self has a specified key in the argument.
122
213
  #
123
- # @param key [Symbol, String] Key to test.
214
+ # @param key [Symbol, String]
215
+ # key to test.
124
216
  # @return [Boolean]
125
- # Returns true if self has key in Symbol.
217
+ # returns true if self has key in Symbol.
218
+ #
126
219
  def key?(key)
127
220
  keys.include?(key.to_sym)
128
221
  end
@@ -130,9 +223,11 @@ module RedAmber
130
223
 
131
224
  # Returns index of specified key in the Array keys.
132
225
  #
133
- # @param key [Symbol, String] key to know.
226
+ # @param key [Symbol, String]
227
+ # key to know.
134
228
  # @return [Integer]
135
- # Index of key in the Array keys.
229
+ # index of key in the Array keys.
230
+ #
136
231
  def key_index(key)
137
232
  keys.find_index(key.to_sym)
138
233
  end
@@ -142,15 +237,19 @@ module RedAmber
142
237
  # Returns abbreviated type names in an Array.
143
238
  #
144
239
  # @return [Array]
145
- # Abbreviated Red Arrow data type names.
240
+ # abbreviated Red Arrow data type names.
241
+ #
146
242
  def types
147
- @types || @types = @table.columns.map { |column| column.data.value_type.nick.to_sym }
243
+ @types || @types = @table.columns.map do |column|
244
+ column.data.value_type.nick.to_sym
245
+ end
148
246
  end
149
247
 
150
248
  # Returns an Array of Classes of data type.
151
249
  #
152
250
  # @return [Array]
153
- # An Array of Red Arrow data type Classes.
251
+ # an Array of Red Arrow data type Classes.
252
+ #
154
253
  def type_classes
155
254
  @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
156
255
  end
@@ -158,50 +257,83 @@ module RedAmber
158
257
  # Returns Vectors in an Array.
159
258
  #
160
259
  # @return [Array]
161
- # An Array of RedAmber::Vector s.
260
+ # an Array of Vector.
261
+ #
162
262
  def vectors
163
263
  @vectors || @vectors = init_instance_vars(:vectors)
164
264
  end
165
265
 
166
- # Returns row indices (start...(size+start)) in an Array.
266
+ # Returns column-oriented data in a Hash.
267
+ #
268
+ # @return [Hash]
269
+ # a Hash of 'key => column_in_an_array'.
167
270
  #
168
- # @param start [Object]
169
- # Object which have #succ method.
170
- # @return [Array]
171
- # An Array of indices of the row.
172
- # @example
173
- # (when self.size == 5)
174
- # - indices #=> [0, 1, 2, 3, 4]
175
- # - indices(1) #=> [1, 2, 3, 4, 5]
176
- # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
177
- def indices(start = 0)
178
- Vector.new((start..).take(size))
179
- end
180
- alias_method :indexes, :indices
181
-
182
271
  def to_h
183
272
  variables.transform_values(&:to_a)
184
273
  end
185
274
 
275
+ # Returns a row-oriented array without header.
276
+ #
277
+ # @return [Array]
278
+ # row-oriented data without header.
279
+ #
280
+ # @note If you need column-oriented array, use `.to_h.to_a`.
281
+ #
186
282
  def to_a
187
- # output an array of row-oriented data without header
188
- # if you need column-oriented array, use `.to_h.to_a`
189
283
  @table.raw_records
190
284
  end
191
285
  alias_method :raw_records, :to_a
192
286
 
287
+ # Returns column name and data type in a Hash.
288
+ #
289
+ # @return [Hash]
290
+ # column name and data type.
291
+ #
292
+ # @example
293
+ # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
294
+ # # => {:x=>:uint8, :y=>:string}
295
+ #
193
296
  def schema
194
297
  keys.zip(types).to_h
195
298
  end
196
299
 
300
+ # Compare DataFrames.
301
+ #
302
+ # @return [true, false]
303
+ # true if other is a DataFrame and table is same.
304
+ # Otherwise return false.
305
+ #
197
306
  def ==(other)
198
307
  other.is_a?(DataFrame) && @table == other.table
199
308
  end
200
309
 
310
+ # Check if it is a empty DataFrame.
311
+ #
312
+ # @return [true, false
313
+ # ] true if it has no columns.
314
+ #
201
315
  def empty?
202
316
  variables.empty?
203
317
  end
204
318
 
319
+ # Enumerate for each row.
320
+ #
321
+ # @overload each_row
322
+ # Returns Enumerator when no block given.
323
+ #
324
+ # @return [Enumerator]
325
+ # enumerator of each rows.
326
+ #
327
+ # @overload each_row(&block)
328
+ # Yields with key and row pairs.
329
+ #
330
+ # @yieldparam key_row_pairs [Hash]
331
+ # key and row pairs.
332
+ # @yieldreturn [Integer]
333
+ # size of the DataFrame.
334
+ # @return [Integer]
335
+ # returns size.
336
+ #
205
337
  def each_row
206
338
  return enum_for(:each_row) unless block_given?
207
339
 
@@ -214,23 +346,348 @@ module RedAmber
214
346
  end
215
347
  end
216
348
 
349
+ # Returns self in a `Rover::DataFrame`.
350
+ #
351
+ # @return [Rover::DataFrame]
352
+ # a `Rover::DataFrame`.
353
+ #
217
354
  def to_rover
218
355
  require 'rover'
219
356
  Rover::DataFrame.new(to_h)
220
357
  end
221
358
 
359
+ # Create a Group object. Or create a Group and summarize it.
360
+ #
361
+ # @overload group(*group_keys)
362
+ # Create a Group object.
363
+ #
364
+ # @param group_keys [Array<Symbol, String>]
365
+ # keys for grouping.
366
+ # @return [Group]
367
+ # Group object.
368
+ # @example Create a Group
369
+ # penguins.group(:species)
370
+ #
371
+ # # =>
372
+ # #<RedAmber::Group : 0x000000000000c3c8>
373
+ # species group_count
374
+ # <string> <uint8>
375
+ # 0 Adelie 152
376
+ # 1 Chinstrap 68
377
+ # 2 Gentoo 124
378
+ #
379
+ # @overload group(*group_keys)
380
+ # Create a Group and summarize it by aggregation functions from the block.
381
+ #
382
+ # @yieldparam group [Group]
383
+ # passes Group object.
384
+ # @yieldreturn [DataFrame, Array<DataFrame>]
385
+ # an aggregated DataFrame or an array of aggregated DataFrames.
386
+ # @return [DataFrame]
387
+ # summarized DataFrame.
388
+ # @example Create a group and summarize it.
389
+ # penguins.group(:species) { mean(:bill_length_mm) }
390
+ #
391
+ # # =>
392
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
393
+ # species mean(bill_length_mm)
394
+ # <string> <double>
395
+ # 0 Adelie 38.79
396
+ # 1 Chinstrap 48.83
397
+ # 2 Gentoo 47.5
398
+ #
222
399
  def group(*group_keys, &block)
223
400
  g = Group.new(self, group_keys)
224
401
  g = g.summarize(&block) if block
225
402
  g
226
403
  end
227
404
 
405
+ # Create SubFrames by value grouping.
406
+ #
407
+ # [Experimental feature] this method may be removed or be changed in the future.
408
+ # @param keys [Symbol, String, Array<Symbol, String>]
409
+ # grouping keys.
410
+ # @return [SubFrames]
411
+ # a created SubFrames grouped by column values on `keys`.
412
+ # @example
413
+ # df.sub_by_value(keys: :y)
414
+ #
415
+ # # =>
416
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
417
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
418
+ # 3 SubFrames: [2, 3, 1] in sizes.
419
+ # ---
420
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
421
+ # x y z
422
+ # <uint8> <string> <boolean>
423
+ # 0 1 A false
424
+ # 1 2 A true
425
+ # ---
426
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
427
+ # x y z
428
+ # <uint8> <string> <boolean>
429
+ # 0 3 B false
430
+ # 1 4 B (nil)
431
+ # 2 5 B true
432
+ # ---
433
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
434
+ # x y z
435
+ # <uint8> <string> <boolean>
436
+ # 0 6 C false
437
+ #
438
+ # @since 0.4.0
439
+ #
440
+ def sub_by_value(keys: nil)
441
+ SubFrames.new(self, group(keys).filters)
442
+ end
443
+ alias_method :subframes_by_value, :sub_by_value
444
+
445
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
446
+ #
447
+ # [Experimental feature] this method may be removed or be changed in the future.
448
+ # @param from [Integer]
449
+ # start position of window.
450
+ # @param size [Integer]
451
+ # window size.
452
+ # @param step [Integer]
453
+ # moving step of window.
454
+ # @return [SubFrames]
455
+ # a created SubFrames.
456
+ # @example
457
+ # df.sub_by_window(size: 4, step: 2)
458
+ #
459
+ # # =>
460
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
461
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
462
+ # 2 SubFrames: [4, 4] in sizes.
463
+ # ---
464
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
465
+ # x y z
466
+ # <uint8> <string> <boolean>
467
+ # 0 1 A false
468
+ # 1 2 A true
469
+ # 2 3 B false
470
+ # 3 4 B (nil)
471
+ # ---
472
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
473
+ # x y z
474
+ # <uint8> <string> <boolean>
475
+ # 0 3 B false
476
+ # 1 4 B (nil)
477
+ # 2 5 B true
478
+ # 3 6 C false
479
+ #
480
+ # @since 0.4.0
481
+ #
482
+ def sub_by_window(from: 0, size: nil, step: 1)
483
+ SubFrames.new(self) do
484
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
485
+ [*i...(i + size)]
486
+ end
487
+ end
488
+ end
489
+ alias_method :subframes_by_window, :sub_by_window
490
+
491
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
492
+ #
493
+ # This method will process the indices of self by enumerator.
494
+ # [Experimental feature] this method may be removed or be changed in the future.
495
+ # @param enumerator_method [Symbol]
496
+ # Enumerator name.
497
+ # @param args [<Object>]
498
+ # arguments for the enumerator method.
499
+ # @return [SubFrames]
500
+ # a created SubFrames.
501
+ # @example Create a SubFrames object sliced by 3 rows.
502
+ # df.sub_by_enum(:each_slice, 3)
503
+ #
504
+ # # =>
505
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
506
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
507
+ # 2 SubFrames: [3, 3] in sizes.
508
+ # ---
509
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
510
+ # x y z
511
+ # <uint8> <string> <boolean>
512
+ # 0 1 A false
513
+ # 1 2 A true
514
+ # 2 3 B false
515
+ # ---
516
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
517
+ # x y z
518
+ # <uint8> <string> <boolean>
519
+ # 0 4 B (nil)
520
+ # 1 5 B true
521
+ # 2 6 C false
522
+ #
523
+ # @example Create a SubFrames object for each consecutive 3 rows.
524
+ # df.sub_by_enum(:each_cons, 4)
525
+ #
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
529
+ # 3 SubFrames: [4, 4, 4] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # 2 3 B false
537
+ # 3 4 B (nil)
538
+ # ---
539
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
540
+ # x y z
541
+ # <uint8> <string> <boolean>
542
+ # 0 2 A true
543
+ # 1 3 B false
544
+ # 2 4 B (nil)
545
+ # 3 5 B true
546
+ # ---
547
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
548
+ # x y z
549
+ # <uint8> <string> <boolean>
550
+ # 0 3 B false
551
+ # 1 4 B (nil)
552
+ # 2 5 B true
553
+ # 3 6 C false
554
+ #
555
+ # @since 0.4.0
556
+ #
557
+ def sub_by_enum(enumerator_method, *args)
558
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
559
+ end
560
+ alias_method :subframes_by_enum, :sub_by_enum
561
+
562
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
563
+ #
564
+ # [Experimental feature] this method may be removed or be changed in the future.
565
+ # @param kernel [Array<true, false>, Vector]
566
+ # boolean array-like to pick records in the window.
567
+ # Kernel is a boolean Array and it behaves like a masked window.
568
+ # @param step [Integer]
569
+ # moving step of window.
570
+ # @return [SubFrames]
571
+ # a created SubFrames.
572
+ # @example
573
+ # kernel = [true, false, false, true]
574
+ # df.sub_by_kernel(kernel, step: 2)
575
+ #
576
+ # # =>
577
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
578
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
579
+ # 2 SubFrames: [2, 2] in sizes.
580
+ # ---
581
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
582
+ # x y z
583
+ # <uint8> <string> <boolean>
584
+ # 0 1 A false
585
+ # 1 4 B (nil)
586
+ # ---
587
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
588
+ # x y z
589
+ # <uint8> <string> <boolean>
590
+ # 0 3 B false
591
+ # 1 6 C false
592
+ #
593
+ # @since 0.4.0
594
+ #
595
+ def sub_by_kernel(kernel, step: 1)
596
+ limit_size = size - kernel.size
597
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
598
+ SubFrames.new(self) do
599
+ 0.step(by: step, to: limit_size).map do |i|
600
+ kernel_vector.shift(i)
601
+ end
602
+ end
603
+ end
604
+ alias_method :subframes_by_kernel, :sub_by_kernel
605
+
606
+ # Generic builder of sub-dataframes from self.
607
+ #
608
+ # [Experimental feature] this method may be removed or be changed in the future.
609
+ # @overload build_subframes(subset_specifier)
610
+ # Create a new SubFrames object.
611
+ #
612
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
613
+ # an Array of numeric indices or boolean filters
614
+ # to create subsets of DataFrame.
615
+ # @return [SubFrames]
616
+ # new SubFrames.
617
+ # @example
618
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
619
+ #
620
+ # # =>
621
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
622
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
623
+ # 2 SubFrames: [3, 3] in sizes.
624
+ # ---
625
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
626
+ # x y z
627
+ # <uint8> <string> <boolean>
628
+ # 0 1 A false
629
+ # 1 3 B false
630
+ # 2 5 B true
631
+ # ---
632
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
633
+ # x y z
634
+ # <uint8> <string> <boolean>
635
+ # 0 2 A true
636
+ # 1 4 B (nil)
637
+ # 2 6 C false
638
+ #
639
+ # @overload build_subframes
640
+ # Create a new SubFrames object by block.
641
+ #
642
+ # @yield [self]
643
+ # the block is called within the context of self.
644
+ # (Block is called by instance_eval(&block). )
645
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
646
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
647
+ # All array-likes are responsible to #numeric? or #boolean?.
648
+ # @example
649
+ # dataframe.build_subframes do
650
+ # even = indices.map(&:even?)
651
+ # [even, !even]
652
+ # end
653
+ #
654
+ # # =>
655
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
656
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
657
+ # 2 SubFrames: [3, 3] in sizes.
658
+ # ---
659
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
660
+ # x y z
661
+ # <uint8> <string> <boolean>
662
+ # 0 1 A false
663
+ # 1 3 B false
664
+ # 2 5 B true
665
+ # ---
666
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
667
+ # x y z
668
+ # <uint8> <string> <boolean>
669
+ # 0 2 A true
670
+ # 1 4 B (nil)
671
+ # 2 6 C false
672
+ #
673
+ # @since 0.4.0
674
+ #
675
+ def build_subframes(subset_specifier = nil, &block)
676
+ if block
677
+ SubFrames.new(self, instance_eval(&block))
678
+ else
679
+ SubFrames.new(self, subset_specifier)
680
+ end
681
+ end
682
+
683
+ # Catch variable (column) key as method name.
228
684
  def method_missing(name, *args, &block)
229
- return v(name) if args.empty?
685
+ return v(name) if args.empty? && key?(name)
230
686
 
231
687
  super
232
688
  end
233
689
 
690
+ # Catch variable (column) key as method name.
234
691
  def respond_to_missing?(name, include_private)
235
692
  return true if key?(name)
236
693
 
@@ -241,20 +698,32 @@ module RedAmber
241
698
 
242
699
  # initialize @variable, @keys, @vectors and return one of them
243
700
  def init_instance_vars(var)
244
- ary = @table.columns.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
245
- v = Vector.new(column.data)
246
- k = column.name.to_sym
247
- v.key = k
248
- variables[k] = v
249
- keys << k
250
- vectors << v
251
- end
701
+ ary =
702
+ @table
703
+ .columns
704
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
705
+ v = Vector.create(column.data)
706
+ k = column.name.to_sym
707
+ v.key = k
708
+ variables[k] = v
709
+ keys << k
710
+ vectors << v
711
+ end
712
+
252
713
  @variables, @keys, @vectors = ary
253
714
  ary[%i[variables keys vectors].index(var)]
254
715
  end
255
716
 
717
+ def check_duplicate_keys(array)
718
+ org = array.dup
719
+ return unless array.uniq!
720
+
721
+ raise DataFrameArgumentError,
722
+ "duplicate keys: #{org.tally.select { |_k, v| v > 1 }.keys}"
723
+ end
724
+
256
725
  def name_unnamed_keys
257
- return unless @table[:'']
726
+ return unless @table.key?('')
258
727
 
259
728
  # We can't use #keys because it causes mismatch of @table and @keys
260
729
  keys = @table.schema.fields.map { |f| f.name.to_sym }