red_amber 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -51
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +203 -1
  5. data/Gemfile +2 -1
  6. data/LICENSE +1 -1
  7. data/README.md +61 -45
  8. data/benchmark/basic.yml +11 -4
  9. data/benchmark/combine.yml +3 -4
  10. data/benchmark/dataframe.yml +62 -0
  11. data/benchmark/group.yml +7 -1
  12. data/benchmark/reshape.yml +6 -2
  13. data/benchmark/vector.yml +63 -0
  14. data/doc/DataFrame.md +35 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +295 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +537 -68
  20. data/lib/red_amber/data_frame_combinable.rb +776 -123
  21. data/lib/red_amber/data_frame_displayable.rb +248 -18
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +81 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +216 -21
  25. data/lib/red_amber/data_frame_selectable.rb +781 -120
  26. data/lib/red_amber/data_frame_variable_operation.rb +561 -85
  27. data/lib/red_amber/group.rb +195 -21
  28. data/lib/red_amber/helper.rb +114 -32
  29. data/lib/red_amber/refinements.rb +206 -0
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +435 -58
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +321 -69
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +397 -24
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +15 -1
  39. data/red_amber.gemspec +4 -3
  40. metadata +19 -11
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -294
@@ -1,73 +1,373 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameVariableOperation
6
- # pick up some variables to create sub DataFrame
6
+ # Array is refined
7
+ using RefineArray
8
+
9
+ # Select variables (columns) to create a new DataFrame.
10
+ #
11
+ # @note if a single key is specified, DataFrame#pick generates a DataFrame.
12
+ # On the other hand, DataFrame#[] generates a Vector.
13
+ #
14
+ # @overload pick(keys)
15
+ # Pick up variables by Symbol(s) or String(s).
16
+ #
17
+ # @param keys [Symbol, String, <Symbol, String>]
18
+ # key name(s) of variables to pick.
19
+ # @return [DataFrame]
20
+ # picked DataFrame.
21
+ # @example Pick up by a key
22
+ # languages
23
+ #
24
+ # # =>
25
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x00000000000cfd8c>
26
+ # Language Creator Released
27
+ # <string> <string> <uint16>
28
+ # 0 Ruby Yukihiro Matsumoto 1995
29
+ # 1 Python Guido van Rossum 1991
30
+ # 2 R Ross Ihaka and Robert Gentleman 1993
31
+ # 3 Rust Graydon Hoare 2001
32
+ #
33
+ # languages.pick(:Language)
34
+ #
35
+ # # =>
36
+ # #<RedAmber::DataFrame : 4 x 1 Vector, 0x0000000000113d20>
37
+ # Language
38
+ # <string>
39
+ # 0 Ruby
40
+ # 1 Python
41
+ # 2 R
42
+ # 3 Rust
43
+ #
44
+ # languages[:Language]
45
+ #
46
+ # # =>
47
+ # #<RedAmber::Vector(:string, size=4):0x000000000010359c>
48
+ # ["Ruby", "Python", "R", "Rust"]
49
+ #
50
+ # @overload pick(booleans)
51
+ # Pick up variables by booleans.
52
+ #
53
+ # @param booleans [<Booleans, nil>, Vector]
54
+ # boolean array or vecctor to pick up variables at true.
55
+ # @return [DataFrame]
56
+ # picked DataFrame.
57
+ # @example Pick up by booleans
58
+ # languages.pick(true, true, false)
59
+ #
60
+ # # =>
61
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
62
+ # Language Creator
63
+ # <string> <string>
64
+ # 0 Ruby Yukihiro Matsumoto
65
+ # 1 Python Guido van Rossum
66
+ # 2 R Ross Ihaka and Robert Gentleman
67
+ # 3 Rust Graydon Hoare
68
+ #
69
+ # is_string = languages.vectors.map(&:string?) # [true, true, false]
70
+ # languages.pick(is_string)
71
+ # # =>
72
+ # (same as above)
73
+ #
74
+ # @overload pick(indices)
75
+ # Pick up variables by column indices.
76
+ #
77
+ # @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
78
+ # numeric array to pick up variables by column index.
79
+ # @return [DataFrame]
80
+ # picked DataFrame.
81
+ # @example Pick up by indices
82
+ # languages.pick(0, 2, 1)
83
+ #
84
+ # # =>
85
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000011cfb0>
86
+ # Language Released Creator
87
+ # <string> <uint16> <string>
88
+ # 0 Ruby 1995 Yukihiro Matsumoto
89
+ # 1 Python 1991 Guido van Rossum
90
+ # 2 R 1993 Ross Ihaka and Robert Gentleman
91
+ # 3 Rust 2001 Graydon Hoare
92
+ #
93
+ # @overload pick
94
+ # Pick up variables by the yielded value from the block.
95
+ # @note Arguments and a block cannot be used simultaneously.
96
+ #
97
+ # @yield [self]
98
+ # the block is called within the context of self.
99
+ # (Block is called by instance_eval(&block). )
100
+ # @yieldreturn [keys, booleans, indices]
101
+ # returns keys, booleans or indices just same as arguments.
102
+ # @return [DataFrame]
103
+ # picked DataFrame.
104
+ # @example Pick up by a block.
105
+ # # same as languages.pick { |df| df.languages.vectors.map(&:string?) }
106
+ # languages.pick { languages.vectors.map(&:string?) }
107
+ #
108
+ # # =>
109
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000154104>
110
+ # Language Creator
111
+ # <string> <string>
112
+ # 0 Ruby Yukihiro Matsumoto
113
+ # 1 Python Guido van Rossum
114
+ # 2 R Ross Ihaka and Robert Gentleman
115
+ # 3 Rust Graydon Hoare
116
+ #
7
117
  def pick(*args, &block)
8
- picker = args
9
118
  if block
10
- raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
119
+ unless args.empty?
120
+ raise DataFrameArgumentError, 'Must not specify both arguments and block.'
121
+ end
11
122
 
12
- picker = [instance_eval(&block)]
123
+ args = [instance_eval(&block)]
13
124
  end
14
- picker.flatten!
15
- return DataFrame.new if picker.empty? || picker == [nil]
16
-
17
- key_vector = Vector.new(keys)
18
- vec = parse_to_vector(picker, vsize: n_keys)
19
-
20
- ary =
21
- if vec.boolean?
22
- key_vector.filter(*vec).to_a
23
- elsif vec.numeric?
24
- key_vector.take(*vec).to_a
25
- elsif vec.string? || vec.dictionary?
26
- vec.to_a
27
- else
28
- raise DataFrameArgumentError, "Invalid argument #{args}"
29
- end
30
125
 
31
- # DataFrame#[] creates a Vector if single key is specified.
32
- # DataFrame#pick creates a DataFrame with single key.
33
- DataFrame.new(@table[ary])
126
+ case args
127
+ in [] | [nil]
128
+ return DataFrame.new
129
+ in [*] if args.symbol?
130
+ return DataFrame.create(@table.select_columns(*args))
131
+ in [*] if args.boolean?
132
+ picker = keys.select_by_booleans(args)
133
+ return DataFrame.create(@table.select_columns(*picker))
134
+ in [(Vector | Arrow::Array | Arrow::ChunkedArray) => a]
135
+ picker = a.to_a
136
+ else
137
+ picker = parse_args(args, n_keys)
138
+ end
139
+
140
+ return DataFrame.new if picker.compact.empty?
141
+
142
+ if picker.boolean?
143
+ picker = keys.select_by_booleans(picker)
144
+ return DataFrame.create(@table.select_columns(*picker))
145
+ end
146
+ picker.compact!
147
+ raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!
148
+
149
+ DataFrame.create(@table.select_columns(*picker))
34
150
  end
35
151
 
36
- # drop some variables to create remainer sub DataFrame
152
+ # Drop off some variables (columns) to create a remainer DataFrame.
153
+ #
154
+ # @note DataFrame#drop creates a DataFrame even if it is a single column
155
+ # (not a Vector).
156
+ #
157
+ # @overload drop(keys)
158
+ # Drop off variables by Symbol(s) or String(s).
159
+ #
160
+ # @param keys [Symbol, String, <Symbol, String>]
161
+ # key name(s) of variables to drop.
162
+ # @return [DataFrame]
163
+ # remainer DataFrame.
164
+ # @example Drop off by a key
165
+ # languages
166
+ #
167
+ # # =>
168
+ # #<RedAmber::DataFrame : 4 x 3 Vectors, 0x00000000000cfd8c>
169
+ # Language Creator Released
170
+ # <string> <string> <uint16>
171
+ # 0 Ruby Yukihiro Matsumoto 1995
172
+ # 1 Python Guido van Rossum 1991
173
+ # 2 R Ross Ihaka and Robert Gentleman 1993
174
+ # 3 Rust Graydon Hoare 2001
175
+ #
176
+ # languages.drop(:Language)
177
+ #
178
+ # # =>
179
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x000000000005805c>
180
+ # Creator Released
181
+ # <string> <uint16>
182
+ # 0 Yukihiro Matsumoto 1995
183
+ # 1 Guido van Rossum 1991
184
+ # 2 Ross Ihaka and Robert Gentleman 1993
185
+ # 3 Graydon Hoare 2001
186
+ #
187
+ # @overload drop(booleans)
188
+ # Drop off variables by booleans.
189
+ #
190
+ # @param booleans [<Booleans, nil>, Vector]
191
+ # boolean array or vector of variables to drop at true.
192
+ # @return [DataFrame]
193
+ # remainer DataFrame.
194
+ # @example Drop off by booleans
195
+ # is_numeric = languages.vectors.map(&:numeric?) # [nil, nil, true]
196
+ # languages.drop(is_numeric)
197
+ #
198
+ # # =>
199
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
200
+ # Language Creator
201
+ # <string> <string>
202
+ # 0 Ruby Yukihiro Matsumoto
203
+ # 1 Python Guido van Rossum
204
+ # 2 R Ross Ihaka and Robert Gentleman
205
+ # 3 Rust Graydon Hoare
206
+ #
207
+ # @overload drop(indices)
208
+ # Drop off variables by column indices.
209
+ #
210
+ # @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
211
+ # numeric array of variables to drop by column index.
212
+ # @return [DataFrame]
213
+ # remainer DataFrame.
214
+ # @example Drop off by indices
215
+ # languages.drop(2)
216
+ #
217
+ # # =>
218
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
219
+ # Language Creator
220
+ # <string> <string>
221
+ # 0 Ruby Yukihiro Matsumoto
222
+ # 1 Python Guido van Rossum
223
+ # 2 R Ross Ihaka and Robert Gentleman
224
+ # 3 Rust Graydon Hoare
225
+ #
226
+ # @overload drop
227
+ # Drop off variables by the yielded value from the block.
228
+ # @note Arguments and a block cannot be used simultaneously.
229
+ #
230
+ # @yield [self] the block is called within the context of self.
231
+ # (Block is called by instance_eval(&block). )
232
+ # @yieldreturn [keys, booleans, indices]
233
+ # returns keys, booleans or indices just same as arguments.
234
+ # @return [DataFrame]
235
+ # remainer DataFrame.
236
+ # @example Drop off by a block.
237
+ # # same as languages.drop { |df| df.vectors.map(&:numeric?) }
238
+ # languages.drop { vectors.map(&:numeric?) }
239
+ #
240
+ # # =>
241
+ # #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000154104>
242
+ # Language Creator
243
+ # <string> <string>
244
+ # 0 Ruby Yukihiro Matsumoto
245
+ # 1 Python Guido van Rossum
246
+ # 2 R Ross Ihaka and Robert Gentleman
247
+ # 3 Rust Graydon Hoare
248
+ #
37
249
  def drop(*args, &block)
38
- dropper = args
39
250
  if block
40
- raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
251
+ unless args.empty?
252
+ raise DataFrameArgumentError, 'Must not specify both arguments and block.'
253
+ end
41
254
 
42
- dropper = [instance_eval(&block)]
255
+ args = [instance_eval(&block)]
43
256
  end
44
- dropper.flatten!
45
-
46
- key_vector = Vector.new(keys)
47
- vec = parse_to_vector(dropper, vsize: n_keys)
48
-
49
- ary =
50
- if vec.boolean?
51
- key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
52
- elsif vec.numeric?
53
- keys - key_vector.take(*vec).each.map(&:to_sym) # Array
54
- elsif vec.string? || vec.dictionary?
55
- keys - vec.to_a.map { _1&.to_sym } # Array
257
+ return self if args.empty? || empty?
258
+
259
+ picker =
260
+ if args.symbol?
261
+ keys - args
262
+ elsif args.boolean?
263
+ keys.reject_by_booleans(args)
264
+ elsif args.integer?
265
+ keys.reject_by_indices(args)
56
266
  else
57
- raise DataFrameArgumentError, "Invalid argument #{args}"
267
+ dropper = parse_args(args, n_keys)
268
+ if dropper.boolean?
269
+ keys.reject_by_booleans(dropper)
270
+ elsif dropper.symbol?
271
+ keys - dropper
272
+ else
273
+ dropper.compact!
274
+ unless dropper.integer?
275
+ raise DataFrameArgumentError, "Invalid argument #{args}"
276
+ end
277
+
278
+ keys.reject_by_indices(dropper)
279
+ end
58
280
  end
59
281
 
60
- return DataFrame.new if ary.empty?
282
+ return DataFrame.new if picker.empty?
61
283
 
62
- # DataFrame#[] creates a Vector if single key is specified.
63
- # DataFrame#drop creates a DataFrame with single key.
64
- DataFrame.new(@table[ary])
284
+ DataFrame.create(@table.select_columns(*picker))
65
285
  end
66
286
 
67
- # rename variables to create a new DataFrame
287
+ # rename keys (variable/column names) to create a updated DataFrame.
288
+ #
289
+ # @overload rename(key_pairs)
290
+ # Rename by key pairs as a Hash.
291
+ #
292
+ # @param key_pairs [Hash{existing_key => new_key}]
293
+ # key pair(s) of existing name and new name.
294
+ # @return [DataFrame]
295
+ # renamed DataFrame.
296
+ # @example Rename by a Hash
297
+ # comecome
298
+ #
299
+ # # =>
300
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037b4>
301
+ # name age
302
+ # <string> <uint8>
303
+ # 0 Yasuko 68
304
+ # 1 Rui 49
305
+ # 2 Hinata 28
306
+ #
307
+ # comecome.rename(:age => :age_in_1993)
308
+ #
309
+ # # =>
310
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037c8>
311
+ # name age_in_1993
312
+ # <string> <uint8>
313
+ # 0 Yasuko 68
314
+ # 1 Rui 49
315
+ # 2 Hinata 28
316
+ #
317
+ # @overload rename(key_pairs)
318
+ # Rename by key pairs as an Array of Array.
319
+ #
320
+ # @param key_pairs [<Array[existing_key, new_key]>]
321
+ # key pair(s) of existing name and new name.
322
+ # @return [DataFrame]
323
+ # renamed DataFrame.
324
+ # @example Rename by an Array
325
+ # renamer = [[:name, :heroine], [:age, :age_in_1993]]
326
+ # comecome.rename(renamer)
327
+ #
328
+ # # =>
329
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037dc>
330
+ # heroine age_in_1993
331
+ # <string> <uint8>
332
+ # 0 Yasuko 68
333
+ # 1 Rui 49
334
+ # 2 Hinata 28
335
+ #
336
+ # @overload rename
337
+ # Rename by key pairs yielding from block.
338
+ #
339
+ # @yield [self] the block is called within the context of self.
340
+ # (Block is called by instance_eval(&block). )
341
+ # @yieldreturn [<[existing_key, new_key]>, Hash]
342
+ # returns an Array or a Hash just same as arguments.
343
+ # @return [DataFrame]
344
+ # renamed DataFrame.
345
+ # @example Rename by block.
346
+ # df
347
+ #
348
+ # # =>
349
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c29c>
350
+ # X Y Z
351
+ # <uint8> <uint8> <uint8>
352
+ # 0 1 3 5
353
+ # 1 2 4 6
354
+ #
355
+ # df.rename { keys.zip(keys.map(&:downcase)) }
356
+ # # or
357
+ # df.rename { [keys, keys.map(&:downcase)].transpose }
358
+ #
359
+ # # =>
360
+ # #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c364>
361
+ # x y z
362
+ # <uint8> <uint8> <uint8>
363
+ # 0 1 3 5
364
+ # 1 2 4 6
365
+ #
68
366
  def rename(*renamer, &block)
69
367
  if block
70
- raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless renamer.empty?
368
+ unless renamer.empty?
369
+ raise DataFrameArgumentError, 'Must not specify both arguments and a block'
370
+ end
71
371
 
72
372
  renamer = [instance_eval(&block)]
73
373
  end
@@ -88,37 +388,211 @@ module RedAmber
88
388
  rename_by_hash(key_pairs)
89
389
  end
90
390
 
91
- # assign variables to create a new DataFrame
391
+ # Assign new or updated variables (columns) and create an updated DataFrame.
392
+ # - Array-like variables with new keys will append new columns from right.
393
+ # - Array-like variables with exisiting keys will update corresponding vectors.
394
+ # - Symbol key and String key are considered as the same key.
395
+ # - If assigner is empty or nil, returns self.
396
+ #
397
+ # @overload assign(key_value_pairs)
398
+ # accepts pairs of key and values by an Array or a Hash.
399
+ #
400
+ # @param key_value_pairs [Array<key, array_like>, Hash{key => array_like}]
401
+ # `key` must be a Symbol or a String.
402
+ # `array_like` is column data to be assigned.
403
+ # It must be one of `Vector` or `Arrow::Array` or `Array`.
404
+ # @return [DataFrame]
405
+ # assigned DataFrame.
406
+ # @example Assign a new column
407
+ # comecome
408
+ #
409
+ # # =>
410
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000280dc>
411
+ # name age
412
+ # <string> <uint8>
413
+ # 0 Yasuko 68
414
+ # 1 Rui 49
415
+ # 2 Hinata 28
416
+ #
417
+ # brothers = ['Santa', nil, 'Momotaro']
418
+ # comecome.assign(brother: brothers)
419
+ # # or
420
+ # comecome.assign({ brother: brothers })
421
+ # # or
422
+ # comecome.assign(:brother, brothers)
423
+ # # or
424
+ # comecome.assign([:brother, brothers])
425
+ #
426
+ # # =>
427
+ # #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000004077c>
428
+ # name age brother
429
+ # <string> <uint8> <string>
430
+ # 0 Yasuko 68 Santa
431
+ # 1 Rui 49 (nil)
432
+ # 2 Hinata 28 Momotaro
433
+ #
434
+ # @example Assign new data for a existing column
435
+ # comecome.assign(age: comecome[:age] + 29)
436
+ #
437
+ # # =>
438
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000065860>
439
+ # name age
440
+ # <string> <uint8>
441
+ # 0 Yasuko 97
442
+ # 1 Rui 78
443
+ # 2 Hinata 57
444
+ #
445
+ # @overload assign
446
+ # accepts block yielding pairs of key and values.
447
+ #
448
+ # @yield [self]
449
+ # the block is called within the context of self.
450
+ # (Block is called by instance_eval(&block). )
451
+ # @yieldreturn [Array<key, array_like>, Hash(key => array_like)]
452
+ # `key` must be a Symbol or a String.
453
+ # `array_like` is column data to be assigned.
454
+ # It must be one of `Vector` or `Arrow::Array` or `Array`.
455
+ # @return [DataFrame]
456
+ # assigned DataFrame.
457
+ # @example Assign new data for a existing column by block
458
+ # comecome.assign { { age: age + 29 } }
459
+ # # or
460
+ # comecome.assign { [:age, age + 29] }
461
+ # # or
462
+ # comecome.assign { [[:age, age + 29]] }
463
+ #
464
+ # # =>
465
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000007d640>
466
+ # name age
467
+ # <string> <uint8>
468
+ # 0 Yasuko 97
469
+ # 1 Rui 78
470
+ # 2 Hinata 57
471
+ #
472
+ # @overload assign(keys)
473
+ # accepts keys from argument and pairs of key and values from block.
474
+ #
475
+ # @param keys [Symbol, String] keys of columns to create or update.
476
+ # @yield [self]
477
+ # the block is called within the context of self.
478
+ # (Block is called by instance_eval(&block).)
479
+ # @yieldreturn [Array<array_like>]
480
+ # column data to be assigned.
481
+ # `array_like` must be one of `Vector` or `Arrow::Array` or `Array`.
482
+ # @return [DataFrame]
483
+ # assigned DataFrame.
484
+ # @example Assign new data for a existing column by block
485
+ # comecome.assign(:age) { age + 29 }
486
+ #
487
+ # # =>
488
+ # #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000007af94>
489
+ # name age
490
+ # <string> <uint8>
491
+ # 0 Yasuko 97
492
+ # 1 Rui 78
493
+ # 2 Hinata 57
494
+ #
495
+ # @example Assign multiple data
496
+ # comecome.assign(:age_in_1993, :brother) do
497
+ # [
498
+ # age + 29,
499
+ # ['Santa', nil, 'Momotaro'],
500
+ # ]
501
+ # end
502
+ #
503
+ # # =>
504
+ # #<RedAmber::DataFrame : 3 x 4 Vectors, 0x00000000000b363c>
505
+ # name age age_in_1993 brother
506
+ # <string> <uint8> <uint8> <string>
507
+ # 0 Yasuko 68 97 Santa
508
+ # 1 Rui 49 78 (nil)
509
+ # 2 Hinata 28 57 Momotaro
510
+ #
92
511
  def assign(*assigner, &block)
93
- appender, fields, arrays = assign_update(*assigner, &block)
94
- return self if appender.is_a?(DataFrame)
95
-
96
- append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false) unless appender.empty?
97
-
98
- DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
512
+ assign_update(*assigner, append_to_left: false, &block)
99
513
  end
100
514
 
515
+ # Assign new or updated variables (columns) and create an updated DataFrame.
516
+ # - Array-like variables with new keys will append new columns from left.
517
+ # - Array-like variables with exisiting keys will update corresponding vectors.
518
+ # - Symbol key and String key are considered as the same key.
519
+ # - If assigner is empty or nil, returns self.
520
+ #
521
+ # @overload assign_left(key_value_pairs)
522
+ # accepts pairs of key and values by an Array or a Hash.
523
+ #
524
+ # @param key_value_pairs [Array<key, array_like>, Hash{key => array_like}]
525
+ # `key` must be a Symbol or a String.
526
+ # `array_like` is column data to be assigned.
527
+ # It must be one of `Vector` or `Arrow::Array` or `Array`.
528
+ # @return [DataFrame]
529
+ # assigned DataFrame.
530
+ # @example Assign a new column from left
531
+ # df
532
+ #
533
+ # # =>
534
+ # #<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000c10c>
535
+ # index float string
536
+ # <uint8> <double> <string>
537
+ # 0 0 0.0 A
538
+ # 1 1 1.1 B
539
+ # 2 2 2.2 C
540
+ # 3 3 NaN D
541
+ # 4 (nil) (nil) (nil)
542
+ #
543
+ # df.assign_left(new_index: df.indices(1))
544
+ #
545
+ # # =>
546
+ # #<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
547
+ # new_index index float string
548
+ # <uint8> <uint8> <double> <string>
549
+ # 0 1 0 0.0 A
550
+ # 1 2 1 1.1 B
551
+ # 2 3 2 2.2 C
552
+ # 3 4 3 NaN D
553
+ # 4 5 (nil) (nil) (nil)
554
+ #
555
+ # @overload assign_left
556
+ # accepts block yielding pairs of key and values.
557
+ #
558
+ # @yield [self]
559
+ # the block is called within the context of self.
560
+ # (Block is called by instance_eval(&block). )
561
+ # @yieldreturn [Array<key, array_like>, Hash(key => array_like)]
562
+ # `key` must be a Symbol or a String.
563
+ # `array_like` is column data to be assigned.
564
+ # It must be one of `Vector` or `Arrow::Array` or `Array`.
565
+ # @return [DataFrame]
566
+ # assigned DataFrame.
567
+ #
568
+ # @overload assign_left(keys)
569
+ # accepts keys from argument and pairs of key and values from block.
570
+ #
571
+ # @param keys [Symbol, String]
572
+ # keys of columns to create or update.
573
+ # @yield [self]
574
+ # the block is called within the context of self.
575
+ # (Block is called by instance_eval(&block).)
576
+ # @yieldreturn [Array<array_like>]
577
+ # column data to be assigned.
578
+ # `array_like` must be one of `Vector` or `Arrow::Array` or `Array`.
579
+ # @return [DataFrame]
580
+ # assigned DataFrame.
581
+ #
101
582
  def assign_left(*assigner, &block)
102
- appender, fields, arrays = assign_update(*assigner, &block)
103
- return self if appender.is_a?(DataFrame)
104
-
105
- append_to_fields_and_arrays(appender, fields, arrays, append_to_left: true) unless appender.empty?
106
-
107
- DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
583
+ assign_update(*assigner, append_to_left: true, &block)
108
584
  end
109
585
 
110
586
  private
111
587
 
112
- def assign_update(*assigner, &block)
588
+ def assign_update(*assigner, append_to_left: false, &block)
113
589
  if block
114
590
  assigner_from_block = instance_eval(&block)
115
591
  assigner =
116
- if assigner.empty?
117
- # block only
592
+ case assigner_from_block
593
+ in _ if assigner.empty? # block only
118
594
  [assigner_from_block]
119
- # If Ruby >= 3.0, one line pattern match can be used
120
- # assigner_from_block in [Array, *]
121
- elsif multiple_assigner?(assigner_from_block)
595
+ in [Vector, *] | [Array, *] | [Arrow::Array, *]
122
596
  assigner.zip(assigner_from_block)
123
597
  else
124
598
  assigner.zip([assigner_from_block])
@@ -128,10 +602,10 @@ module RedAmber
128
602
  case assigner
129
603
  in [] | [nil] | [{}] | [[]]
130
604
  return self
131
- in [Hash => key_array_pairs]
132
- # noop
133
605
  in [(Symbol | String) => key, (Vector | Array | Arrow::Array) => array]
134
606
  key_array_pairs = { key => array }
607
+ in [Hash => key_array_pairs]
608
+ # noop
135
609
  in [Array => array_in_array]
136
610
  key_array_pairs = try_convert_to_hash(array_in_array)
137
611
  in [Array, *] => array_in_array1
@@ -151,20 +625,27 @@ module RedAmber
151
625
  appender[key] = array
152
626
  end
153
627
  end
154
- [appender, *update_fields_and_arrays(updater)]
628
+ fields, arrays = *update_fields_and_arrays(updater)
629
+ return self if appender.is_a?(DataFrame)
630
+
631
+ unless appender.empty?
632
+ append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
633
+ end
634
+
635
+ DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
155
636
  end
156
637
 
157
638
  def try_convert_to_hash(array)
158
639
  array.to_h
159
640
  rescue TypeError
160
641
  [array].to_h
161
- rescue TypeError # rubocop:disable Lint/DuplicateRescueException
162
- raise DataFrameArgumentError, "Invalid argument in Array #{array}"
163
642
  end
164
643
 
165
644
  def rename_by_hash(key_pairs)
166
645
  not_existing_keys = key_pairs.keys - keys
167
- raise DataFrameArgumentError, "Not existing: #{not_existing_keys}" unless not_existing_keys.empty?
646
+ unless not_existing_keys.empty?
647
+ raise DataFrameArgumentError, "Not existing: #{not_existing_keys}"
648
+ end
168
649
 
169
650
  fields =
170
651
  keys.map do |key|
@@ -175,7 +656,7 @@ module RedAmber
175
656
  @table.schema[key]
176
657
  end
177
658
  end
178
- DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
659
+ DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
179
660
  end
180
661
 
181
662
  def update_fields_and_arrays(updater)
@@ -185,7 +666,9 @@ module RedAmber
185
666
  data = updater[key]
186
667
  next unless data
187
668
 
188
- raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
669
+ if data.size != size
670
+ raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
671
+ end
189
672
 
190
673
  a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
191
674
  fields[i] = Arrow::Field.new(key, a.value_data_type)
@@ -194,10 +677,12 @@ module RedAmber
194
677
  [fields, arrays]
195
678
  end
196
679
 
197
- def append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false)
680
+ def append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
198
681
  enum = append_to_left ? appender.reverse_each : appender.each
199
682
  enum.each do |key, data|
200
- raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
683
+ if data.size != size
684
+ raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
685
+ end
201
686
 
202
687
  a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
203
688
 
@@ -210,14 +695,5 @@ module RedAmber
210
695
  end
211
696
  end
212
697
  end
213
-
214
- def multiple_assigner?(assigner)
215
- case assigner
216
- in [Vector, *] | [Array, *] | [Arrow::Array, *]
217
- true
218
- else
219
- false
220
- end
221
- end
222
698
  end
223
699
  end