red_amber 0.2.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -51
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +203 -1
  5. data/Gemfile +2 -1
  6. data/LICENSE +1 -1
  7. data/README.md +61 -45
  8. data/benchmark/basic.yml +11 -4
  9. data/benchmark/combine.yml +3 -4
  10. data/benchmark/dataframe.yml +62 -0
  11. data/benchmark/group.yml +7 -1
  12. data/benchmark/reshape.yml +6 -2
  13. data/benchmark/vector.yml +63 -0
  14. data/doc/DataFrame.md +35 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +295 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +537 -68
  20. data/lib/red_amber/data_frame_combinable.rb +776 -123
  21. data/lib/red_amber/data_frame_displayable.rb +248 -18
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +81 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +216 -21
  25. data/lib/red_amber/data_frame_selectable.rb +781 -120
  26. data/lib/red_amber/data_frame_variable_operation.rb +561 -85
  27. data/lib/red_amber/group.rb +195 -21
  28. data/lib/red_amber/helper.rb +114 -32
  29. data/lib/red_amber/refinements.rb +206 -0
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +435 -58
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +321 -69
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +397 -24
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +15 -1
  39. data/red_amber.gemspec +4 -3
  40. metadata +19 -11
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -294
@@ -4,7 +4,7 @@ module RedAmber
4
4
  # Class to represent a data frame.
5
5
  # Variable @table holds an Arrow::Table object.
6
6
  class DataFrame
7
- # mix-in
7
+ # Mix-in
8
8
  include DataFrameCombinable
9
9
  include DataFrameDisplayable
10
10
  include DataFrameIndexable
@@ -14,65 +14,151 @@ module RedAmber
14
14
  include DataFrameVariableOperation
15
15
  include Helper
16
16
 
17
- # Creates a new RedAmber::DataFrame.
17
+ using RefineArrowTable
18
+ using RefineHash
19
+
20
+ class << self
21
+ # Quicker DataFrame constructor from a `Arrow::Table`.
22
+ #
23
+ # @param table [Arrow::Table]
24
+ # A table to have in the DataFrame.
25
+ # @return [DataFrame]
26
+ # Initialized DataFrame.
27
+ #
28
+ # @note This method will allocate table directly and may be used in the method.
29
+ # @note `table` must have unique keys.
30
+ #
31
+ def create(table)
32
+ instance = allocate
33
+ instance.instance_variable_set(:@table, table)
34
+ instance
35
+ end
36
+ end
37
+
38
+ # Creates a new DataFrame.
18
39
  #
19
40
  # @overload initialize(hash)
41
+ # Initialize a DataFrame by a Hash.
20
42
  #
21
- # @params hash [Hash]
43
+ # @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
44
+ # a Hash of `key` with array-like for column values.
45
+ # `key`s are Symbol or String.
46
+ # @example Initialize by a Hash
47
+ # hash = { x: [1, 2, 3], y: %w[A B C] }
48
+ # DataFrame.new(hash)
49
+ # @example Initialize by a Hash like arguments.
50
+ # DataFrame.new(x: [1, 2, 3], y: %w[A B C])
51
+ # @example Initialize from #to_arrow_array responsibles.
52
+ # # #to_arrow_array responsible `array-like` is also available.
53
+ # require 'arrow-numo-narray'
54
+ # DataFrame.new(numo: Numo::DFloat.new(3).rand)
22
55
  #
23
56
  # @overload initialize(table)
57
+ # Initialize a DataFrame by an `Arrow::Table`.
58
+ #
59
+ # @param table [Arrow::Table]
60
+ # a table to have in the DataFrame.
61
+ # @example Initialize by a Table
62
+ # table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
63
+ # DataFrame.new(table)
64
+ #
65
+ # @overload initialize(schama, row_oriented_array)
66
+ # Initialize a DataFrame by schema and row_oriented_array.
67
+ #
68
+ # @param schema [Hash<key => type>]
69
+ # a schema of key and data type.
70
+ # @param row_oriented_array [Array]
71
+ # an Array of rows.
72
+ # @example Initialize by a schema and a row_oriented_array.
73
+ # schema = { x: :uint8, y: :string }
74
+ # row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
75
+ # DataFrame.new(schema, row_oriented_array)
76
+ #
77
+ # @overload initialize(arrowable)
78
+ # Initialize DataFrame by a `#to_arrow` responsible object.
79
+ #
80
+ # @param arrowable [#to_arrow]
81
+ # Any object which responds to `#to_arrow`.
82
+ # `#to_arrow` must return `Arrow::Table`.
24
83
  #
25
- # @params table [Arrow::Table]
84
+ # @note `RedAmber::DataFrame` itself is readable by this.
85
+ # @note Hash is refined to respond to `#to_arrow` in this class.
86
+ # @example Initialize by Red Dataset object.
87
+ # require 'datasets-arrow'
88
+ # dataset = Datasets::Penguins.new
89
+ # penguins = DataFrame.new(dataset)
90
+ # @since 0.2.2
26
91
  #
27
- # @overload initialize(dataframe)
92
+ # @overload initialize(rover_like)
93
+ # Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
28
94
  #
29
- # @params dataframe [RedAmber::DataFrame, Rover::DataFrame]
95
+ # @param rover_like [#to_h]
96
+ # Any object which responds to `#to_h`.
97
+ # `#to_h` must return a Hash which is convertable by `Arrow::Table.new`.
30
98
  #
31
- # @overload initialize(null)
99
+ # @note `Rover::DataFrame` is readable by this.
32
100
  #
33
- # @params null [NilClass] No arguments.
101
+ # @overload initialize()
102
+ # Create empty DataFrame
103
+ #
104
+ # @example
105
+ # DataFrame.new
106
+ #
107
+ # @overload initialize(empty)
108
+ # Create empty DataFrame
109
+ #
110
+ # @param empty [nil, [], {}]
111
+ #
112
+ # @example Return empty DataFrame.
113
+ # DataFrame.new([])
114
+ # DataFrame.new({})
115
+ # DataFrame.new(nil)
34
116
  #
35
117
  def initialize(*args)
36
- @variables = @keys = @vectors = @types = @data_types = nil
37
118
  case args
38
119
  in nil | [nil] | [] | {} | [[]] | [{}]
39
- # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
40
- # returns empty DataFrame
41
120
  @table = Arrow::Table.new({}, [])
42
- in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
121
+ in [Arrow::Table => table]
122
+ @table = table
123
+ in [arrowable] if arrowable.respond_to?(:to_arrow)
43
124
  table = arrowable.to_arrow
44
125
  unless table.is_a?(Arrow::Table)
45
126
  raise DataFrameTypeError,
46
127
  "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
47
128
  end
48
129
  @table = table
49
- in [Arrow::Table => table]
50
- @table = table
51
- in [rover_or_hash]
130
+ in [rover_like] if rover_like.respond_to?(:to_h)
52
131
  begin
53
- # Accepts Rover::DataFrame or Hash
54
- @table = Arrow::Table.new(rover_or_hash.to_h)
132
+ # Accepts Rover::DataFrame
133
+ @table = Arrow::Table.new(rover_like.to_h)
55
134
  rescue StandardError
56
- raise DataFrameTypeError, "invalid argument: #{rover_or_hash}"
135
+ raise DataFrameTypeError, "to_h must return Arrowable object: #{rover_like}"
57
136
  end
58
137
  else
59
- @table = Arrow::Table.new(*args)
138
+ begin
139
+ @table = Arrow::Table.new(*args)
140
+ rescue StandardError
141
+ raise DataFrameTypeError, "invalid argument to create Arrow::Table: #{args}"
142
+ end
60
143
  end
61
- name_unnamed_keys
62
144
 
63
- duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
64
- raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
145
+ name_unnamed_keys
146
+ check_duplicate_keys(keys)
65
147
  end
66
148
 
149
+ # Returns the table having within.
150
+ #
151
+ # @return [Arrow::Table]
152
+ # the table within.
153
+ #
67
154
  attr_reader :table
155
+ alias_method :to_arrow, :table
68
156
 
69
- def to_arrow
70
- @table
71
- end
72
-
73
- # Returns the number of rows.
157
+ # Returns the number of records (rows).
158
+ #
159
+ # @return [Integer]
160
+ # number of records (rows).
74
161
  #
75
- # @return [Integer] Number of rows.
76
162
  def size
77
163
  @table.n_rows
78
164
  end
@@ -80,9 +166,11 @@ module RedAmber
80
166
  alias_method :n_obs, :size
81
167
  alias_method :n_rows, :size
82
168
 
83
- # Returns the number of columns.
169
+ # Returns the number of variables (columns).
170
+ #
171
+ # @return [Integer]
172
+ # number of variables (columns).
84
173
  #
85
- # @return [Integer] Number of columns.
86
174
  def n_keys
87
175
  @table.n_columns
88
176
  end
@@ -93,8 +181,9 @@ module RedAmber
93
181
  # Returns the numbers of rows and columns.
94
182
  #
95
183
  # @return [Array]
96
- # Number of rows and number of columns in an array.
184
+ # number of rows and number of columns in an array.
97
185
  # Same as [size, n_keys].
186
+ #
98
187
  def shape
99
188
  [size, n_keys]
100
189
  end
@@ -102,7 +191,8 @@ module RedAmber
102
191
  # Returns a Hash of key and Vector pairs in the columns.
103
192
  #
104
193
  # @return [Hash]
105
- # key => Vector pairs for each columns.
194
+ # `key => Vector` pairs for each columns.
195
+ #
106
196
  def variables
107
197
  @variables || @variables = init_instance_vars(:variables)
108
198
  end
@@ -111,7 +201,8 @@ module RedAmber
111
201
  # Returns an Array of keys.
112
202
  #
113
203
  # @return [Array]
114
- # Keys in an Array.
204
+ # keys in an Array.
205
+ #
115
206
  def keys
116
207
  @keys || @keys = init_instance_vars(:keys)
117
208
  end
@@ -120,9 +211,11 @@ module RedAmber
120
211
 
121
212
  # Returns true if self has a specified key in the argument.
122
213
  #
123
- # @param key [Symbol, String] Key to test.
214
+ # @param key [Symbol, String]
215
+ # key to test.
124
216
  # @return [Boolean]
125
- # Returns true if self has key in Symbol.
217
+ # returns true if self has key in Symbol.
218
+ #
126
219
  def key?(key)
127
220
  keys.include?(key.to_sym)
128
221
  end
@@ -130,9 +223,11 @@ module RedAmber
130
223
 
131
224
  # Returns index of specified key in the Array keys.
132
225
  #
133
- # @param key [Symbol, String] key to know.
226
+ # @param key [Symbol, String]
227
+ # key to know.
134
228
  # @return [Integer]
135
- # Index of key in the Array keys.
229
+ # index of key in the Array keys.
230
+ #
136
231
  def key_index(key)
137
232
  keys.find_index(key.to_sym)
138
233
  end
@@ -142,15 +237,19 @@ module RedAmber
142
237
  # Returns abbreviated type names in an Array.
143
238
  #
144
239
  # @return [Array]
145
- # Abbreviated Red Arrow data type names.
240
+ # abbreviated Red Arrow data type names.
241
+ #
146
242
  def types
147
- @types || @types = @table.columns.map { |column| column.data.value_type.nick.to_sym }
243
+ @types || @types = @table.columns.map do |column|
244
+ column.data.value_type.nick.to_sym
245
+ end
148
246
  end
149
247
 
150
248
  # Returns an Array of Classes of data type.
151
249
  #
152
250
  # @return [Array]
153
- # An Array of Red Arrow data type Classes.
251
+ # an Array of Red Arrow data type Classes.
252
+ #
154
253
  def type_classes
155
254
  @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
156
255
  end
@@ -158,50 +257,83 @@ module RedAmber
158
257
  # Returns Vectors in an Array.
159
258
  #
160
259
  # @return [Array]
161
- # An Array of RedAmber::Vector s.
260
+ # an Array of Vector.
261
+ #
162
262
  def vectors
163
263
  @vectors || @vectors = init_instance_vars(:vectors)
164
264
  end
165
265
 
166
- # Returns row indices (start...(size+start)) in an Array.
266
+ # Returns column-oriented data in a Hash.
267
+ #
268
+ # @return [Hash]
269
+ # a Hash of 'key => column_in_an_array'.
167
270
  #
168
- # @param start [Object]
169
- # Object which have #succ method.
170
- # @return [Array]
171
- # An Array of indices of the row.
172
- # @example
173
- # (when self.size == 5)
174
- # - indices #=> [0, 1, 2, 3, 4]
175
- # - indices(1) #=> [1, 2, 3, 4, 5]
176
- # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
177
- def indices(start = 0)
178
- Vector.new((start..).take(size))
179
- end
180
- alias_method :indexes, :indices
181
-
182
271
  def to_h
183
272
  variables.transform_values(&:to_a)
184
273
  end
185
274
 
275
+ # Returns a row-oriented array without header.
276
+ #
277
+ # @return [Array]
278
+ # row-oriented data without header.
279
+ #
280
+ # @note If you need column-oriented array, use `.to_h.to_a`.
281
+ #
186
282
  def to_a
187
- # output an array of row-oriented data without header
188
- # if you need column-oriented array, use `.to_h.to_a`
189
283
  @table.raw_records
190
284
  end
191
285
  alias_method :raw_records, :to_a
192
286
 
287
+ # Returns column name and data type in a Hash.
288
+ #
289
+ # @return [Hash]
290
+ # column name and data type.
291
+ #
292
+ # @example
293
+ # RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
294
+ # # => {:x=>:uint8, :y=>:string}
295
+ #
193
296
  def schema
194
297
  keys.zip(types).to_h
195
298
  end
196
299
 
300
+ # Compare DataFrames.
301
+ #
302
+ # @return [true, false]
303
+ # true if other is a DataFrame and table is same.
304
+ # Otherwise return false.
305
+ #
197
306
  def ==(other)
198
307
  other.is_a?(DataFrame) && @table == other.table
199
308
  end
200
309
 
310
+ # Check if it is a empty DataFrame.
311
+ #
312
+ # @return [true, false
313
+ # ] true if it has no columns.
314
+ #
201
315
  def empty?
202
316
  variables.empty?
203
317
  end
204
318
 
319
+ # Enumerate for each row.
320
+ #
321
+ # @overload each_row
322
+ # Returns Enumerator when no block given.
323
+ #
324
+ # @return [Enumerator]
325
+ # enumerator of each rows.
326
+ #
327
+ # @overload each_row(&block)
328
+ # Yields with key and row pairs.
329
+ #
330
+ # @yieldparam key_row_pairs [Hash]
331
+ # key and row pairs.
332
+ # @yieldreturn [Integer]
333
+ # size of the DataFrame.
334
+ # @return [Integer]
335
+ # returns size.
336
+ #
205
337
  def each_row
206
338
  return enum_for(:each_row) unless block_given?
207
339
 
@@ -214,23 +346,348 @@ module RedAmber
214
346
  end
215
347
  end
216
348
 
349
+ # Returns self in a `Rover::DataFrame`.
350
+ #
351
+ # @return [Rover::DataFrame]
352
+ # a `Rover::DataFrame`.
353
+ #
217
354
  def to_rover
218
355
  require 'rover'
219
356
  Rover::DataFrame.new(to_h)
220
357
  end
221
358
 
359
+ # Create a Group object. Or create a Group and summarize it.
360
+ #
361
+ # @overload group(*group_keys)
362
+ # Create a Group object.
363
+ #
364
+ # @param group_keys [Array<Symbol, String>]
365
+ # keys for grouping.
366
+ # @return [Group]
367
+ # Group object.
368
+ # @example Create a Group
369
+ # penguins.group(:species)
370
+ #
371
+ # # =>
372
+ # #<RedAmber::Group : 0x000000000000c3c8>
373
+ # species group_count
374
+ # <string> <uint8>
375
+ # 0 Adelie 152
376
+ # 1 Chinstrap 68
377
+ # 2 Gentoo 124
378
+ #
379
+ # @overload group(*group_keys)
380
+ # Create a Group and summarize it by aggregation functions from the block.
381
+ #
382
+ # @yieldparam group [Group]
383
+ # passes Group object.
384
+ # @yieldreturn [DataFrame, Array<DataFrame>]
385
+ # an aggregated DataFrame or an array of aggregated DataFrames.
386
+ # @return [DataFrame]
387
+ # summarized DataFrame.
388
+ # @example Create a group and summarize it.
389
+ # penguins.group(:species) { mean(:bill_length_mm) }
390
+ #
391
+ # # =>
392
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
393
+ # species mean(bill_length_mm)
394
+ # <string> <double>
395
+ # 0 Adelie 38.79
396
+ # 1 Chinstrap 48.83
397
+ # 2 Gentoo 47.5
398
+ #
222
399
  def group(*group_keys, &block)
223
400
  g = Group.new(self, group_keys)
224
401
  g = g.summarize(&block) if block
225
402
  g
226
403
  end
227
404
 
405
+ # Create SubFrames by value grouping.
406
+ #
407
+ # [Experimental feature] this method may be removed or be changed in the future.
408
+ # @param keys [Symbol, String, Array<Symbol, String>]
409
+ # grouping keys.
410
+ # @return [SubFrames]
411
+ # a created SubFrames grouped by column values on `keys`.
412
+ # @example
413
+ # df.sub_by_value(keys: :y)
414
+ #
415
+ # # =>
416
+ # #<RedAmber::SubFrames : 0x000000000000fc08>
417
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
418
+ # 3 SubFrames: [2, 3, 1] in sizes.
419
+ # ---
420
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
421
+ # x y z
422
+ # <uint8> <string> <boolean>
423
+ # 0 1 A false
424
+ # 1 2 A true
425
+ # ---
426
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
427
+ # x y z
428
+ # <uint8> <string> <boolean>
429
+ # 0 3 B false
430
+ # 1 4 B (nil)
431
+ # 2 5 B true
432
+ # ---
433
+ # #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
434
+ # x y z
435
+ # <uint8> <string> <boolean>
436
+ # 0 6 C false
437
+ #
438
+ # @since 0.4.0
439
+ #
440
+ def sub_by_value(keys: nil)
441
+ SubFrames.new(self, group(keys).filters)
442
+ end
443
+ alias_method :subframes_by_value, :sub_by_value
444
+
445
+ # Create SubFrames by Windowing with `from`, `size` and `step`.
446
+ #
447
+ # [Experimental feature] this method may be removed or be changed in the future.
448
+ # @param from [Integer]
449
+ # start position of window.
450
+ # @param size [Integer]
451
+ # window size.
452
+ # @param step [Integer]
453
+ # moving step of window.
454
+ # @return [SubFrames]
455
+ # a created SubFrames.
456
+ # @example
457
+ # df.sub_by_window(size: 4, step: 2)
458
+ #
459
+ # # =>
460
+ # #<RedAmber::SubFrames : 0x000000000000fc58>
461
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
462
+ # 2 SubFrames: [4, 4] in sizes.
463
+ # ---
464
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
465
+ # x y z
466
+ # <uint8> <string> <boolean>
467
+ # 0 1 A false
468
+ # 1 2 A true
469
+ # 2 3 B false
470
+ # 3 4 B (nil)
471
+ # ---
472
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
473
+ # x y z
474
+ # <uint8> <string> <boolean>
475
+ # 0 3 B false
476
+ # 1 4 B (nil)
477
+ # 2 5 B true
478
+ # 3 6 C false
479
+ #
480
+ # @since 0.4.0
481
+ #
482
+ def sub_by_window(from: 0, size: nil, step: 1)
483
+ SubFrames.new(self) do
484
+ from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
485
+ [*i...(i + size)]
486
+ end
487
+ end
488
+ end
489
+ alias_method :subframes_by_window, :sub_by_window
490
+
491
+ # Create SubFrames by Grouping/Windowing by posion from a enumrator method.
492
+ #
493
+ # This method will process the indices of self by enumerator.
494
+ # [Experimental feature] this method may be removed or be changed in the future.
495
+ # @param enumerator_method [Symbol]
496
+ # Enumerator name.
497
+ # @param args [<Object>]
498
+ # arguments for the enumerator method.
499
+ # @return [SubFrames]
500
+ # a created SubFrames.
501
+ # @example Create a SubFrames object sliced by 3 rows.
502
+ # df.sub_by_enum(:each_slice, 3)
503
+ #
504
+ # # =>
505
+ # #<RedAmber::SubFrames : 0x000000000000fd20>
506
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
507
+ # 2 SubFrames: [3, 3] in sizes.
508
+ # ---
509
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
510
+ # x y z
511
+ # <uint8> <string> <boolean>
512
+ # 0 1 A false
513
+ # 1 2 A true
514
+ # 2 3 B false
515
+ # ---
516
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
517
+ # x y z
518
+ # <uint8> <string> <boolean>
519
+ # 0 4 B (nil)
520
+ # 1 5 B true
521
+ # 2 6 C false
522
+ #
523
+ # @example Create a SubFrames object for each consecutive 3 rows.
524
+ # df.sub_by_enum(:each_cons, 4)
525
+ #
526
+ # # =>
527
+ # #<RedAmber::SubFrames : 0x000000000000fd98>
528
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
529
+ # 3 SubFrames: [4, 4, 4] in sizes.
530
+ # ---
531
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
532
+ # x y z
533
+ # <uint8> <string> <boolean>
534
+ # 0 1 A false
535
+ # 1 2 A true
536
+ # 2 3 B false
537
+ # 3 4 B (nil)
538
+ # ---
539
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
540
+ # x y z
541
+ # <uint8> <string> <boolean>
542
+ # 0 2 A true
543
+ # 1 3 B false
544
+ # 2 4 B (nil)
545
+ # 3 5 B true
546
+ # ---
547
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
548
+ # x y z
549
+ # <uint8> <string> <boolean>
550
+ # 0 3 B false
551
+ # 1 4 B (nil)
552
+ # 2 5 B true
553
+ # 3 6 C false
554
+ #
555
+ # @since 0.4.0
556
+ #
557
+ def sub_by_enum(enumerator_method, *args)
558
+ SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
559
+ end
560
+ alias_method :subframes_by_enum, :sub_by_enum
561
+
562
+ # Create SubFrames by windowing with a kernel (i.e. masked window) and step.
563
+ #
564
+ # [Experimental feature] this method may be removed or be changed in the future.
565
+ # @param kernel [Array<true, false>, Vector]
566
+ # boolean array-like to pick records in the window.
567
+ # Kernel is a boolean Array and it behaves like a masked window.
568
+ # @param step [Integer]
569
+ # moving step of window.
570
+ # @return [SubFrames]
571
+ # a created SubFrames.
572
+ # @example
573
+ # kernel = [true, false, false, true]
574
+ # df.sub_by_kernel(kernel, step: 2)
575
+ #
576
+ # # =>
577
+ # #<RedAmber::SubFrames : 0x000000000000fde8>
578
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
579
+ # 2 SubFrames: [2, 2] in sizes.
580
+ # ---
581
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
582
+ # x y z
583
+ # <uint8> <string> <boolean>
584
+ # 0 1 A false
585
+ # 1 4 B (nil)
586
+ # ---
587
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
588
+ # x y z
589
+ # <uint8> <string> <boolean>
590
+ # 0 3 B false
591
+ # 1 6 C false
592
+ #
593
+ # @since 0.4.0
594
+ #
595
+ def sub_by_kernel(kernel, step: 1)
596
+ limit_size = size - kernel.size
597
+ kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
598
+ SubFrames.new(self) do
599
+ 0.step(by: step, to: limit_size).map do |i|
600
+ kernel_vector.shift(i)
601
+ end
602
+ end
603
+ end
604
+ alias_method :subframes_by_kernel, :sub_by_kernel
605
+
606
+ # Generic builder of sub-dataframes from self.
607
+ #
608
+ # [Experimental feature] this method may be removed or be changed in the future.
609
+ # @overload build_subframes(subset_specifier)
610
+ # Create a new SubFrames object.
611
+ #
612
+ # @param subset_specifier [Array<Vector>, Array<array-like>]
613
+ # an Array of numeric indices or boolean filters
614
+ # to create subsets of DataFrame.
615
+ # @return [SubFrames]
616
+ # new SubFrames.
617
+ # @example
618
+ # df.build_subframes([[0, 2, 4], [1, 3, 5]])
619
+ #
620
+ # # =>
621
+ # #<RedAmber::SubFrames : 0x000000000000fe9c>
622
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
623
+ # 2 SubFrames: [3, 3] in sizes.
624
+ # ---
625
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
626
+ # x y z
627
+ # <uint8> <string> <boolean>
628
+ # 0 1 A false
629
+ # 1 3 B false
630
+ # 2 5 B true
631
+ # ---
632
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
633
+ # x y z
634
+ # <uint8> <string> <boolean>
635
+ # 0 2 A true
636
+ # 1 4 B (nil)
637
+ # 2 6 C false
638
+ #
639
+ # @overload build_subframes
640
+ # Create a new SubFrames object by block.
641
+ #
642
+ # @yield [self]
643
+ # the block is called within the context of self.
644
+ # (Block is called by instance_eval(&block). )
645
+ # @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
646
+ # an Array of index or boolean array-likes to create subsets of DataFrame.
647
+ # All array-likes are responsible to #numeric? or #boolean?.
648
+ # @example
649
+ # dataframe.build_subframes do
650
+ # even = indices.map(&:even?)
651
+ # [even, !even]
652
+ # end
653
+ #
654
+ # # =>
655
+ # #<RedAmber::SubFrames : 0x000000000000fe60>
656
+ # @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
657
+ # 2 SubFrames: [3, 3] in sizes.
658
+ # ---
659
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
660
+ # x y z
661
+ # <uint8> <string> <boolean>
662
+ # 0 1 A false
663
+ # 1 3 B false
664
+ # 2 5 B true
665
+ # ---
666
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
667
+ # x y z
668
+ # <uint8> <string> <boolean>
669
+ # 0 2 A true
670
+ # 1 4 B (nil)
671
+ # 2 6 C false
672
+ #
673
+ # @since 0.4.0
674
+ #
675
+ def build_subframes(subset_specifier = nil, &block)
676
+ if block
677
+ SubFrames.new(self, instance_eval(&block))
678
+ else
679
+ SubFrames.new(self, subset_specifier)
680
+ end
681
+ end
682
+
683
+ # Catch variable (column) key as method name.
228
684
  def method_missing(name, *args, &block)
229
- return v(name) if args.empty?
685
+ return v(name) if args.empty? && key?(name)
230
686
 
231
687
  super
232
688
  end
233
689
 
690
+ # Catch variable (column) key as method name.
234
691
  def respond_to_missing?(name, include_private)
235
692
  return true if key?(name)
236
693
 
@@ -241,20 +698,32 @@ module RedAmber
241
698
 
242
699
  # initialize @variable, @keys, @vectors and return one of them
243
700
  def init_instance_vars(var)
244
- ary = @table.columns.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
245
- v = Vector.new(column.data)
246
- k = column.name.to_sym
247
- v.key = k
248
- variables[k] = v
249
- keys << k
250
- vectors << v
251
- end
701
+ ary =
702
+ @table
703
+ .columns
704
+ .each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
705
+ v = Vector.create(column.data)
706
+ k = column.name.to_sym
707
+ v.key = k
708
+ variables[k] = v
709
+ keys << k
710
+ vectors << v
711
+ end
712
+
252
713
  @variables, @keys, @vectors = ary
253
714
  ary[%i[variables keys vectors].index(var)]
254
715
  end
255
716
 
717
+ def check_duplicate_keys(array)
718
+ org = array.dup
719
+ return unless array.uniq!
720
+
721
+ raise DataFrameArgumentError,
722
+ "duplicate keys: #{org.tally.select { |_k, v| v > 1 }.keys}"
723
+ end
724
+
256
725
  def name_unnamed_keys
257
- return unless @table[:'']
726
+ return unless @table.key?('')
258
727
 
259
728
  # We can't use #keys because it causes mismatch of @table and @keys
260
729
  keys = @table.schema.fields.map { |f| f.name.to_sym }