red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -0,0 +1,529 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
+ # reference: https://arrow.apache.org/docs/cpp/compute.html
5
+
6
+ module RedAmber
7
+ # Representing a series of data.
8
+ class Vector
9
+ class << self
10
+ private
11
+
12
+ # @!macro [attach] define_unary_element_wise
13
+ # [Unary element-wise function] Returns a Vector.
14
+ #
15
+ def define_unary_element_wise(function)
16
+ define_method(function) do |**options|
17
+ datum = exec_func_unary(function, options)
18
+ Vector.create(datum.value)
19
+ end
20
+ end
21
+ end
22
+
23
+ # @!macro array_sort_options
24
+ # @param order [:ascending, :descending]
25
+ # ascending: Arrange values in increasing order.
26
+ # descending: Arrange values in decreasing order.
27
+
28
+ # rubocop:disable Layout/LineLength
29
+
30
+ # @!macro round_mode
31
+ # @param round_mode [:down, :up, :towards_zero, :towards_infinity, :half_down, :half_up, :half_towards_zero, :half_towards_infinity, :half_to_even, :half_to_odd]
32
+ # Rounding and tie-breaking mode.
33
+ # - down: Round to nearest integer less than or equal in magnitude (aka “floor”).
34
+ # - up: Round to nearest integer greater than or equal in magnitude (aka “ceil”).
35
+ # - towards_zero: Get the integral part without fractional digits (aka “trunc”).
36
+ # - towards_infinity: Round negative values with :down rule and positive values
37
+ # with :up rule (aka “away from zero”).
38
+ # - half_down: Round ties with :down rule (also called
39
+ # “round half towards negative infinity”).
40
+ # - half_up: Round ties with :up rule (also called
41
+ # “round half towards positive infinity”).
42
+ # - half_towards_zero: Round ties with :towards_zero rule (also called
43
+ # “round half away from infinity”).
44
+ # - half_towards_infinity: Round ties with :towards_infinity rule (also called
45
+ # “round half away from zero”).
46
+ # - half_to_even: Round ties to nearest even integer.
47
+ # - half_to_odd: Round ties to nearest odd integer.
48
+
49
+ # rubocop:enable Layout/LineLength
50
+
51
+ # Calculate the absolute value of self element-wise.
52
+ #
53
+ # Results will wrap around on integer overflow.
54
+ # @return [Vector]
55
+ # abs of each element of self.
56
+ #
57
+ define_unary_element_wise :abs
58
+
59
+ # Calculate the absolute value of self element-wise.
60
+ #
61
+ # This function is a overflow-checking variant of #abs.
62
+ # @return (see #abs)
63
+ #
64
+ define_unary_element_wise :abs_checked
65
+
66
+ # Compute the inverse cosine of self element-wise.
67
+ #
68
+ # NaN is returned for invalid input values.
69
+ # @return [Vector]
70
+ # acos of each element of self.
71
+ #
72
+ define_unary_element_wise :acos
73
+
74
+ # Compute the inverse cosine of self element-wise.
75
+ #
76
+ # This function is a overflow-checking variant of #acos.
77
+ # @return (see #acos)
78
+ #
79
+ define_unary_element_wise :acos_checked
80
+
81
+ # Compute the inverse sine of self element-wise.
82
+ #
83
+ # NaN is returned for invalid input values.
84
+ # @return [Vector]
85
+ # asin of each element of self.
86
+ #
87
+ define_unary_element_wise :asin
88
+
89
+ # Compute the inverse sine of self element-wise.
90
+ #
91
+ # This function is a overflow-checking variant of #asin.
92
+ # @return (see #asin)
93
+ #
94
+ define_unary_element_wise :asin_checked
95
+
96
+ # Return the indices that would sort self.
97
+ #
98
+ # Computes indices Vector that define a stable sort of self.
99
+ # By default, nils are considered greater than any other value
100
+ # and are therefore sorted at the end of the Vector.
101
+ # For floating-point types, NaNs are considered greater than any
102
+ # other non-nil value, but smaller than nil.
103
+ # @!method array_sort_indices(order: :ascending)
104
+ # @macro array_sort_options
105
+ # @return [Vector]
106
+ # sort indices of self.
107
+ #
108
+ define_unary_element_wise :array_sort_indices
109
+ alias_method :sort_indexes, :array_sort_indices
110
+ alias_method :sort_indices, :array_sort_indices
111
+ alias_method :sort_index, :array_sort_indices
112
+
113
+ # Compute the inverse tangent of self element-wise.
114
+ #
115
+ # the return value is in the range [-pi/2, pi/2].
116
+ # For a full return range [-pi, pi], see {.atan2} .
117
+ # @return [Vector]
118
+ # atan of each element of self.
119
+ #
120
+ define_unary_element_wise :atan
121
+
122
+ # Bit-wise negate by element-wise.
123
+ #
124
+ # nil values reeturn nil.
125
+ # @return [Vector]
126
+ # bit wise not of each element of self.
127
+ #
128
+ define_unary_element_wise :bit_wise_not
129
+
130
+ # Round up to the nearest integer.
131
+ #
132
+ # Compute the smallest integer value not less in magnitude than each element.
133
+ # @return [Vector]
134
+ # ceil of each element of self.
135
+ # @example
136
+ # double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
137
+ # double.ceil
138
+ #
139
+ # # =>
140
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd00>
141
+ # [16.0, 3.0, 4.0, -4.0, -5.0]
142
+ #
143
+ define_unary_element_wise :ceil
144
+
145
+ # Compute the cosine of self element-wise.
146
+ #
147
+ # NaN is returned for invalid input values.
148
+ # @return [Vector]
149
+ # cos of each element of self.
150
+ #
151
+ define_unary_element_wise :cos
152
+
153
+ # Compute the cosine of self element-wise.
154
+ #
155
+ # This function is a overflow-checking variant of #cos.
156
+ # @return (see #cos)
157
+ #
158
+ define_unary_element_wise :cos_checked
159
+
160
+ # Compute cumulative sum over the numeric Vector.
161
+ #
162
+ # This function is a overflow-checking variant of #cumsum.
163
+ # @note Self must be numeric.
164
+ # @note Return error for integer overflow.
165
+ # @return [Vector]
166
+ # cumulative sum of self.
167
+ #
168
+ define_unary_element_wise :cumulative_sum_checked
169
+
170
+ # Compute cumulative sum over the numeric Vector.
171
+ #
172
+ # @note Self must be numeric.
173
+ # @note Try to cast to Int64 if integer overflow occured.
174
+ # @return [Vector]
175
+ # cumulative sum of self.
176
+ #
177
+ def cumsum
178
+ cumulative_sum_checked
179
+ rescue Arrow::Error::Invalid
180
+ Vector.create(Arrow::Int64Array.new(data)).cumulative_sum_checked
181
+ end
182
+
183
+ # Carry non-nil values backward to fill nil slots.
184
+ #
185
+ # Propagate next valid value backward to previous nil values.
186
+ # Or nothing if all next values are nil.
187
+ # @return [Vector]
188
+ # a Vector which filled nil backward.
189
+ # @example
190
+ # integer = Vector.new([0, 1, nil, 3, nil])
191
+ # integer.fill_nil_backward
192
+ #
193
+ # # =>
194
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000f974>
195
+ # [0, 1, 3, 3, nil]
196
+ #
197
+ define_unary_element_wise :fill_null_backward
198
+ alias_method :fill_nil_backward, :fill_null_backward
199
+
200
+ # Carry non-nil values forward to fill nil slots.
201
+ #
202
+ # Propagate last valid value backward to next nil values.
203
+ # Or nothing if all previous values are nil.
204
+ # @return [Vector]
205
+ # a Vector which filled nil forward.
206
+ # @example
207
+ # integer = Vector.new([0, 1, nil, 3, nil])
208
+ # integer.fill_nil_forward
209
+ #
210
+ # # =>
211
+ # #<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
212
+ # [0, 1, 1, 3, 3]
213
+ #
214
+ define_unary_element_wise :fill_null_forward
215
+ alias_method :fill_nil_forward, :fill_null_forward
216
+
217
+ # Round down to the nearest integer.
218
+ #
219
+ # Compute the largest integer value not greater in magnitude than each element.
220
+ # @return [Vector]
221
+ # floor of each element of self.
222
+ # @example
223
+ # double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
224
+ # double.floor
225
+ #
226
+ # # =>
227
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd14>
228
+ # [15.0, 2.0, 3.0, -5.0, -6.0]
229
+ #
230
+ define_unary_element_wise :floor
231
+
232
+ # Return true if value is finite.
233
+ #
234
+ # For each input value, emit true if the value is finite.
235
+ # (i.e. neither NaN, inf, nor -inf).
236
+ # @return [Vector]
237
+ # boolean Vector wheather each element is finite.
238
+ #
239
+ define_unary_element_wise :is_finite
240
+
241
+ # Return true if value is infinity.
242
+ #
243
+ # For each input value, emit true if the value is infinite (inf or -inf).
244
+ # @return [Vector]
245
+ # boolean Vector wheather each element is inf.
246
+ #
247
+ define_unary_element_wise :is_inf
248
+
249
+ # return true if value is nil or NaN.
250
+ #
251
+ # For each input value, emit true if the value is nil or NaN.
252
+ # @return [Vector]
253
+ # boolean Vector wheather each element is na.
254
+ #
255
+ def is_na # rubocop:disable Naming/PredicateName
256
+ numeric? ? (is_nil | is_nan) : is_nil
257
+ end
258
+
259
+ # Return true if NaN.
260
+ #
261
+ # For each input value, emit true if the value is NaN.
262
+ # @return [Vector]
263
+ # boolean Vector wheather each element is nan.
264
+ #
265
+ define_unary_element_wise :is_nan
266
+
267
+ # Return true if nil.
268
+ #
269
+ # @note Arrow::NullOptions is not supported yet.
270
+ # For each input value, emit true if the value is nil.
271
+ # @return [Vector]
272
+ # boolean Vector wheather each element is null.
273
+ #
274
+ define_unary_element_wise :is_null
275
+ alias_method :is_nil, :is_null
276
+
277
+ # Return true if non-nil.
278
+ #
279
+ # For each input value, emit true if the value is valid (i.e. non-nil).
280
+ # @return [Vector]
281
+ # boolean Vector wheather each element is valid.
282
+ #
283
+ define_unary_element_wise :is_valid
284
+
285
+ # Compute natural logarithm.
286
+ #
287
+ # Non-positive values return -inf or NaN. Nil values return nil.
288
+ # @return [Vector]
289
+ # natural logarithm of each element of self.
290
+ #
291
+ define_unary_element_wise :ln
292
+
293
+ # Compute natural logarithm.
294
+ #
295
+ # This function is a overflow-checking variant of #ln.
296
+ # @return (see #ln)
297
+ #
298
+ define_unary_element_wise :ln_checked
299
+
300
+ # Compute base 10 logarithm.
301
+ #
302
+ # Non-positive values return -inf or NaN. Nil values return nil.
303
+ # @return [Vector]
304
+ # base 10 logarithm of each element of self.
305
+ #
306
+ define_unary_element_wise :log10
307
+
308
+ # Compute base 10 logarithm.
309
+ #
310
+ # This function is a overflow-checking variant of #log10.
311
+ # @return (see #log10)
312
+ #
313
+ define_unary_element_wise :log10_checked
314
+
315
+ # Compute natural log of (1+x).
316
+ #
317
+ # Non-positive values return -inf or NaN. Nil values return nil.
318
+ # This function may be more precise than log(1 + x) for x close to zero.
319
+ # @return [Vector]
320
+ # natural log of (each element + 1) of self.
321
+ #
322
+ define_unary_element_wise :log1p
323
+
324
+ # Compute natural log of (1+x).
325
+ #
326
+ # This function is a overflow-checking variant of #log1p.
327
+ # @return (see #log1p)
328
+ #
329
+ define_unary_element_wise :log1p_checked
330
+
331
+ # Compute base 2 logarithm.
332
+ #
333
+ # Non-positive values return -inf or NaN. Nil values return nil.
334
+ # @return [Vector]
335
+ # base 2 logarithm of each element of self.
336
+ #
337
+ define_unary_element_wise :log2
338
+
339
+ # Compute base 2 logarithm.
340
+ #
341
+ # This function is a overflow-checking variant of #log2.
342
+ # @return (see #log2)
343
+ #
344
+ define_unary_element_wise :log2_checked
345
+
346
+ # Round to a given precision.
347
+ #
348
+ # Options are used to control the number of digits and rounding mode.
349
+ # Default behavior is to round to the nearest integer and
350
+ # use half-to-even rule to break ties.
351
+ # @!method round(n_digits: 0, round_mode: :half_to_even)
352
+ # @param n_digits [Integer]
353
+ # Rounding precision (number of digits to round to).
354
+ # @macro round_mode
355
+ # @return [Vector]
356
+ # round of each element of self.
357
+ # @example
358
+ # double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
359
+ # double.round
360
+ # # or double.round(n_digits: 0, mode: :half_to_even)
361
+ #
362
+ # # =>
363
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd28>
364
+ # [15.0, 2.0, 4.0, -4.0, -6.0]
365
+ #
366
+ # double.round(mode: :towards_infinity)
367
+ #
368
+ # # =>
369
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd3c>
370
+ # [16.0, 3.0, 4.0, -5.0, -6.0]
371
+ #
372
+ # double.round(mode: :half_up)
373
+ #
374
+ # # =>
375
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd50>
376
+ # [15.0, 3.0, 4.0, -4.0, -5.0]
377
+ #
378
+ # double.round(mode: :half_towards_zero)
379
+ #
380
+ # # =>
381
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd64>
382
+ # [15.0, 2.0, 3.0, -4.0, -5.0]
383
+ #
384
+ # double.round(mode: :half_towards_infinity)
385
+ #
386
+ # # =>
387
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd78>
388
+ # [15.0, 3.0, 4.0, -5.0, -6.0]
389
+ #
390
+ # double.round(mode: :half_to_odd)
391
+ #
392
+ # # =>
393
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cd8c>
394
+ # [15.0, 3.0, 3.0, -5.0, -5.0]
395
+ #
396
+ # double.round(n_digits: 1)
397
+ #
398
+ # # =>
399
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cda0>
400
+ # [15.2, 2.5, 3.5, -4.5, -5.5]
401
+ #
402
+ # double.round(n_digits: -1)
403
+ #
404
+ # # =>
405
+ # #<RedAmber::Vector(:double, size=5):0x000000000000cdb4>
406
+ # [20.0, 0.0, 0.0, -0.0, -10.0]
407
+ #
408
+ define_unary_element_wise :round
409
+
410
+ # Round to a given multiple.
411
+ #
412
+ # Options are used to control the rounding multiple and rounding mode.
413
+ # Default behavior is to round to the nearest integer and
414
+ # use half-to-even rule to break ties.
415
+ # @!method round_to_multiple(multiple: 1.0, round_mode: :half_to_even)
416
+ # @param multiple [Float, Integer]
417
+ # Rounding scale (multiple to round to).
418
+ # Should be a positive numeric scalar of a type compatible with the argument
419
+ # to be rounded. The cast kernel is used to convert the rounding multiple
420
+ # to match the result type.
421
+ # @macro round_mode
422
+ # @return [Vector]
423
+ # round to multiple of each element of self.
424
+ #
425
+ def round_to_multiple(multiple: 1.0, round_mode: :half_to_even)
426
+ datum = exec_func_unary(:round_to_multiple,
427
+ multiple: Arrow::DoubleScalar.new(multiple),
428
+ round_mode: round_mode)
429
+ Vector.create(datum.value)
430
+ end
431
+
432
+ # Get the signedness of the arguments element-wise.
433
+ #
434
+ # Output is any of (-1,1) for nonzero inputs and 0 for zero input.
435
+ # NaN values return NaN. Integral values return signedness as Int8 and
436
+ # floating-point values return it with the same type as the input values.
437
+ # @return [Vector]
438
+ # sign of each element of self.
439
+ #
440
+ define_unary_element_wise :sign
441
+
442
+ # Compute the sine of self element-wise.
443
+ #
444
+ # NaN is returned for invalid input values.
445
+ # @return [Vector]
446
+ # sine of each element of self.
447
+ #
448
+ define_unary_element_wise :sin
449
+
450
+ # Compute the sine of self element-wise.
451
+ #
452
+ # This function is a overflow-checking variant of #sin.
453
+ # @return (see #sin)
454
+ #
455
+ define_unary_element_wise :sin_checked
456
+
457
+ # Compute square root of self.
458
+ #
459
+ # NaN is returned for invalid input values.
460
+ # @return [Vector]
461
+ # sqrt of each element of self.
462
+ #
463
+ define_unary_element_wise :sqrt
464
+
465
+ # Compute square root of self.
466
+ #
467
+ # This function is a overflow-checking variant of #sqrt.
468
+ # @return (see #sqrt)
469
+ #
470
+ define_unary_element_wise :sqrt_checked
471
+
472
+ # Compute the tangent of self element-wise.
473
+ #
474
+ # NaN is returned for invalid input values.
475
+ # @return [Vector]
476
+ # tangent of each element of self.
477
+ #
478
+ define_unary_element_wise :tan
479
+
480
+ # Compute the tangent of self element-wise.
481
+ #
482
+ # This function is a overflow-checking variant of #tan.
483
+ # @return (see #tan)
484
+ #
485
+ define_unary_element_wise :tan_checked
486
+
487
+ # Compute the integral part
488
+ #
489
+ # Compute the nearest integer not greater in magnitude than each element.
490
+ # @return [Vector]
491
+ # trunc of each element of self.
492
+ #
493
+ define_unary_element_wise :trunc
494
+
495
+ # Compute unique elements
496
+ #
497
+ # Return an array with distinct values. Nils in the input are ignored.
498
+ # @return [Vector]
499
+ # uniq element of self.
500
+ #
501
+ define_unary_element_wise :unique
502
+ alias_method :uniq, :unique
503
+
504
+ # Invert boolean values
505
+ #
506
+ # @return [Vector]
507
+ # not of each element of self.
508
+ #
509
+ define_unary_element_wise :invert
510
+ alias_method :'!', :invert # rubocop:disable Lint/SymbolConversion
511
+ alias_method :not, :invert
512
+
513
+ # Negate the argument element-wise
514
+ #
515
+ # Results will wrap around on integer overflow.
516
+ # @return [Vector]
517
+ # negate of each element of self.
518
+ #
519
+ define_unary_element_wise :negate
520
+ alias_method :'-@', :negate # rubocop:disable Lint/SymbolConversion
521
+
522
+ # Negate the argument element-wise
523
+ #
524
+ # This function is a overflow-checking variant of #negate.
525
+ # @return (see #negate)
526
+ #
527
+ define_unary_element_wise :negate_checked
528
+ end
529
+ end