red_amber 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -5,23 +5,57 @@ module RedAmber
5
5
  # @data : holds Arrow::ChunkedArray
6
6
  class Vector
7
7
  # mix-in
8
- include VectorFunctions
8
+ include Enumerable
9
+ include Helper
10
+ include ArrowFunction
9
11
  include VectorUpdatable
10
12
  include VectorSelectable
11
- include Helper
12
13
 
13
14
  using RefineArrayLike
14
15
 
15
16
  # Quicker constructor of Vector.
16
17
  #
18
+ # @param arrow_array [Arrow::Array]
19
+ # Arrow::Array object to have in the Vector.
20
+ # @return [Vector]
21
+ # created Vector.
22
+ # @note This method doesn't check argment type.
23
+ #
17
24
  def self.create(arrow_array)
18
25
  instance = allocate
19
26
  instance.instance_variable_set(:@data, arrow_array)
20
27
  instance
21
28
  end
22
29
 
30
+ # Return true if it is an aggregation function.
31
+ #
32
+ # @param function [Symbol]
33
+ # function name to test.
34
+ # @return [Booleans]
35
+ # true if function is a aggregation function, otherwise false.
36
+ #
37
+ # @example
38
+ # Vector.aggregate?(:mean) # => true
39
+ #
40
+ # Vector.aggregate?(:round) # => false
41
+ #
42
+ # @since 0.4.0
43
+ #
44
+ def self.aggregate?(function)
45
+ %i[
46
+ all all? any any? approximate_median count count_distinct count_uniq
47
+ max mean median min min_max product quantile sd std stddev sum
48
+ unbiased_variance var variance
49
+ ].include?(function.to_sym)
50
+ end
51
+
23
52
  # Create a Vector.
24
53
  #
54
+ # @param array [Array, Vector, Range, Arrow::Array, #to_arrow_array]
55
+ # array-like.
56
+ # @return [Vector]
57
+ # created Vector.
58
+ #
25
59
  # @note default is headless Vector and '@key == nil'
26
60
  def initialize(*array)
27
61
  @data =
@@ -39,15 +73,99 @@ module RedAmber
39
73
  end
40
74
  end
41
75
 
76
+ # Entity of Vector.
77
+ #
78
+ # @return [Arrow::Array]
79
+ #
42
80
  attr_reader :data
43
81
  alias_method :to_arrow_array, :data
44
82
 
83
+ # Associated key name when self is in a DataFrame.
84
+ #
85
+ # Default Vector is 'head-less' (key-less).
86
+ # @return [Symbol]
87
+ #
45
88
  attr_accessor :key
46
89
 
90
+ # Return other as a Vector which is same data type as self.
91
+ #
92
+ # @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
93
+ # a source array-like which will be converted.
94
+ # @return [Vector]
95
+ # resolved Vector.
96
+ # @example Integer to String
97
+ # Vector.new('A').resolve([1, 2])
98
+ #
99
+ # # =>
100
+ # #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
101
+ # ["1", "2"]
102
+ #
103
+ # @example String to Ineger
104
+ # Vector.new(1).resolve(['A'])
105
+ #
106
+ # # =>
107
+ # #<RedAmber::Vector(:uint8, size=1):0x00000000000037dc>
108
+ # [65]
109
+ #
110
+ # @example Upcast to uint16
111
+ # vector = Vector.new(256)
112
+ #
113
+ # # =>
114
+ # #<RedAmber::Vector(:uint16, size=1):0x000000000000c1fc>
115
+ # [256]
116
+ #
117
+ # vector.resolve([1, 2])
118
+ #
119
+ # # =>
120
+ # # Not a uint8 Vector
121
+ # #<RedAmber::Vector(:uint16, size=2):0x000000000000c328>
122
+ # [1, 2]
123
+ #
124
+ # @since 0.4.0
125
+ #
126
+ def resolve(other)
127
+ case other
128
+ when Vector
129
+ Vector.create(data.resolve(other.data))
130
+ when Array, Arrow::Array, Arrow::ChunkedArray
131
+ Vector.create(data.resolve(other))
132
+ else
133
+ raise VectorArgumentError, "invalid argument: #{other}"
134
+ end
135
+ end
136
+
137
+ # String representation of self like an Array.
138
+ #
139
+ # @return [String]
140
+ # return self as same as Array's inspect.
141
+ #
47
142
  def to_s
48
143
  @data.to_a.inspect
49
144
  end
50
145
 
146
+ # String representation of self.
147
+ #
148
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
149
+ # - If it is 'MINIMUM', returns class and size.
150
+ # - If it is otherwise, returns class, size and preview.
151
+ # Default value of the ENV is 'Table'.
152
+ # @param limit [Integer]
153
+ # max width of the result.
154
+ # @return [String]
155
+ # show information of self as a String.
156
+ # @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
157
+ # puts vector.inspect
158
+ #
159
+ # # =>
160
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000037f0>
161
+ # [1, 2, 3]
162
+ #
163
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
164
+ # puts vector.inspect
165
+ #
166
+ # # =>
167
+ # RedAmber::Vector(:uint8, size=3)
168
+ #
51
169
  def inspect(limit: 80)
52
170
  if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
53
171
  # Better performance than `.upcase == 'MINIMUM'`
@@ -70,82 +188,187 @@ module RedAmber
70
188
  end
71
189
  end
72
190
 
191
+ # Convert to an Array.
192
+ #
193
+ # @return [Array]
194
+ # array representation.
195
+ #
73
196
  def to_ary
74
197
  @data.values
75
198
  end
76
-
77
199
  alias_method :to_a, :to_ary
78
200
  alias_method :values, :to_ary
79
201
  alias_method :entries, :to_ary
80
202
 
203
+ # Indeces from 0 to size-1 by Array.
204
+ #
205
+ # @return [Array]
206
+ # indices.
207
+ #
81
208
  def indices
82
209
  (0...size).to_a
83
210
  end
84
-
85
211
  alias_method :indexes, :indices
86
212
  alias_method :indeces, :indices
87
213
 
214
+ # Vector size.
215
+ #
216
+ # @return [Integer]
217
+ # size of self.
218
+ #
88
219
  def size
89
220
  # only defined :length in Arrow?
90
221
  @data.length
91
222
  end
92
-
93
223
  alias_method :length, :size
94
224
  alias_method :n_rows, :size
95
225
  alias_method :nrow, :size
96
226
 
227
+ # Test wheather self is empty.
228
+ #
229
+ # @return [true, false]
230
+ # true if self is empty.
231
+ #
97
232
  def empty?
98
233
  size.zero?
99
234
  end
100
235
 
236
+ # Type nickname of self.
237
+ #
238
+ # @return [Symbol]
239
+ # type nickname of values.
240
+ #
101
241
  def type
102
242
  list? ? :list : @data.value_type.nick.to_sym
103
243
  end
104
244
 
245
+ # Type Class of self.
246
+ #
247
+ # @return [type_Class]
248
+ # type class.
249
+ #
250
+ def type_class
251
+ @data.type_class
252
+ end
253
+
254
+ # Test if self is a boolean Vector.
255
+ #
256
+ # @return [true, false]
257
+ # test result.
258
+ #
105
259
  def boolean?
106
260
  @data.boolean?
107
261
  end
108
262
 
263
+ # Test if self is a numeric Vector.
264
+ #
265
+ # @return [true, false]
266
+ # test result.
267
+ #
109
268
  def numeric?
110
269
  @data.numeric?
111
270
  end
112
271
 
272
+ # Test if self is a float Vector.
273
+ #
274
+ # @return [true, false]
275
+ # test result.
276
+ #
113
277
  def float?
114
278
  @data.float?
115
279
  end
116
280
 
281
+ # Test if self is a integer Vector.
282
+ #
283
+ # @return [true, false]
284
+ # test result.
285
+ #
117
286
  def integer?
118
287
  @data.integer?
119
288
  end
120
289
 
290
+ # Test if self is a string Vector.
291
+ #
292
+ # @return [true, false]
293
+ # test result.
294
+ #
121
295
  def string?
122
296
  @data.string?
123
297
  end
124
298
 
299
+ # Test if self is a dictionary Vector.
300
+ #
301
+ # @return [true, false]
302
+ # test result.
303
+ #
125
304
  def dictionary?
126
305
  @data.dictionary?
127
306
  end
128
307
 
308
+ # Test if self is a temporal Vector.
309
+ #
310
+ # @return [true, false]
311
+ # test result.
312
+ #
129
313
  def temporal?
130
314
  @data.temporal?
131
315
  end
132
316
 
317
+ # Test if self is a list Vector.
318
+ #
319
+ # @return [true, false]
320
+ # test result.
321
+ #
133
322
  def list?
134
323
  @data.list?
135
324
  end
136
325
 
137
- def type_class
138
- @data.type_class
139
- end
140
-
326
+ # Iterates over Vector elements or returns a Enumerator.
327
+ #
328
+ # @overload each
329
+ # Returns a new Enumerator if no block given.
330
+ #
331
+ # @return [Enumerator]
332
+ # Enumerator of each elements.
333
+ #
334
+ # @overload each
335
+ # When a block given, passes each element in self to the block.
336
+ #
337
+ # @yieldparam element [Object]
338
+ # passes element by a block parameter.
339
+ # @yieldreturn [Object]
340
+ # evaluated result value from the block.
341
+ # @return [self]
342
+ # returns self.
343
+ #
141
344
  def each
142
345
  return enum_for(:each) unless block_given?
143
346
 
144
347
  size.times do |i|
145
348
  yield data[i]
146
349
  end
350
+ self
147
351
  end
148
352
 
353
+ # Returns a Vector from collected objects from the block.
354
+ #
355
+ # @overload map
356
+ # Returns a new Enumerator if no block given.
357
+ #
358
+ # @return [Enumerator]
359
+ # a new Enumerator.
360
+ #
361
+ # @overload map
362
+ # When a block given, calls the block with successive elements.
363
+ # Returns a Vector of the objects returned by the block.
364
+ #
365
+ # @yieldparam element [Object]
366
+ # passes element by a block parameter.
367
+ # @yieldreturn [Object]
368
+ # evaluated result value from the block.
369
+ # @return [self]
370
+ # returns the collected values from the block as a Vector.
371
+ #
149
372
  def map(&block)
150
373
  return enum_for(:map) unless block
151
374
 
@@ -153,18 +376,35 @@ module RedAmber
153
376
  end
154
377
  alias_method :collect, :map
155
378
 
156
- # undocumented
379
+ # Tests wheather self is chunked or not.
380
+ #
381
+ # @api private
382
+ # @return [true, false]
383
+ # returns true if #data is chunked.
384
+ #
157
385
  def chunked?
158
386
  @data.is_a? Arrow::ChunkedArray
159
387
  end
160
388
 
161
- # undocumented
389
+ # Returns the number of chunks.
390
+ #
391
+ # @api private
392
+ # @return [Integer]
393
+ # the number of chunks. If self is not chunked, returns zero.
394
+ #
162
395
  def n_chunks
163
396
  chunked? ? @data.n_chunks : 0
164
397
  end
165
398
 
166
399
  # def each_chunk() end
167
400
 
401
+ # Returns a hash containing the counts of equal elements.
402
+ #
403
+ # - Each key is an element of self.
404
+ # - Each value is the number of elements equal to the key.
405
+ # @return [Hash]
406
+ # result in a Hash.
407
+ #
168
408
  def tally
169
409
  hash = values.tally
170
410
  if (type_class < Arrow::FloatingPointDataType) && is_nan.any
@@ -180,22 +420,156 @@ module RedAmber
180
420
  hash
181
421
  end
182
422
 
423
+ # @api private
424
+ # Arrow imprementation of #tally
183
425
  def value_counts
184
426
  values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
185
427
  values.zip(counts).to_h
186
428
  end
187
429
 
430
+ # Count nils in self.
431
+ #
432
+ # @return [Integer]
433
+ # the number of nils.
434
+ #
188
435
  def n_nulls
189
436
  @data.n_nulls
190
437
  end
191
438
  alias_method :n_nils, :n_nulls
192
439
 
440
+ # Count NaNs in self if self is a numeric Vector
441
+ #
442
+ # @return [Integer]
443
+ # the number of Float::NANs. If self is not a numeric Vector,
444
+ # returns 0.
445
+ #
193
446
  def n_nans
194
447
  numeric? ? is_nan.to_a.count(true) : 0
195
448
  end
196
449
 
450
+ # Return true if self has any nil.
451
+ #
452
+ # @return [true, false]
453
+ # true or false.
454
+ #
197
455
  def has_nil?
198
456
  is_nil.any
199
457
  end
458
+
459
+ # Enable to compute with coercion mechanism.
460
+ #
461
+ # @example
462
+ # vector = Vector.new(1,2,3)
463
+ #
464
+ # # =>
465
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
466
+ # [1, 2, 3]
467
+ #
468
+ # # Vector's `#*` method
469
+ # vector * -1
470
+ #
471
+ # # =>
472
+ # #<RedAmber::Vector(:int16, size=3):0x00000000000e3698>
473
+ # [-1, -2, -3]
474
+ #
475
+ # # coerced calculation
476
+ # -1 * vector
477
+ #
478
+ # # =>
479
+ # #<RedAmber::Vector(:int16, size=3):0x00000000000ea4ac>
480
+ # [-1, -2, -3]
481
+ #
482
+ # # `@-` operator
483
+ # -vector
484
+ #
485
+ # # =>
486
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000ee7b4>
487
+ # [255, 254, 253]
488
+ #
489
+ def coerce(other)
490
+ [Vector.new(Array(other) * size), self]
491
+ end
492
+
493
+ # Spread the return value of an aggregate function as if
494
+ # it is a element-wise function.
495
+ #
496
+ # @overload propagate(function)
497
+ # Returns a Vector of same size as self spreading the value from function.
498
+ #
499
+ # @param function [Symbol] a name of aggregation function for self.
500
+ # Return value of the function must be a scalar.
501
+ # @return [Vector] Returns a Vector that is the same size as self
502
+ # and such that all elements are the same as the result of aggregation `function`.
503
+ # @example propagate by an aggragation function name
504
+ # vec = Vector.new(1, 2, 3, 4)
505
+ # vec.propagate(:mean)
506
+ # # =>
507
+ # #<RedAmber::Vector(:double, size=4):0x000000000001985c>
508
+ # [2.5, 2.5, 2.5, 2.5]
509
+ #
510
+ # @overload propagate
511
+ # Returns a Vector of same size as self spreading the value from block.
512
+ #
513
+ # @yieldparam self [Vector]
514
+ # gives self to the block.
515
+ # @yieldreturn [scalar]
516
+ # a scalar value.
517
+ # @return [Vector]
518
+ # returns a Vector that is the same size as self
519
+ # and such that all elements are the same as the yielded value from the block.
520
+ # @example propagate by a block
521
+ # vec.propagate { |v| v.mean.round }
522
+ # # =>
523
+ # #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
524
+ # [3, 3, 3, 3]
525
+ #
526
+ # @since 0.4.0
527
+ #
528
+ def propagate(function = nil, &block)
529
+ value =
530
+ if block
531
+ raise VectorArgumentError, "can't specify both function and block" if function
532
+
533
+ yield self
534
+ else
535
+ function = function&.to_sym
536
+ unless function && respond_to?(function) && Vector.aggregate?(function)
537
+ raise VectorArgumentError, "illegal function: #{function.inspect}"
538
+ end
539
+
540
+ send(function)
541
+ end
542
+ Vector.new([value] * size)
543
+ end
544
+ alias_method :expand, :propagate
545
+
546
+ private # =======
547
+
548
+ def exec_func_unary(function, options)
549
+ options = nil if options.empty?
550
+ find(function).execute([data], options)
551
+ end
552
+
553
+ def exec_func_binary(function, other, options)
554
+ options = nil if options.empty?
555
+ case other
556
+ when Vector
557
+ find(function).execute([data, other.data], options)
558
+ when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
559
+ Array, Numeric, String, TrueClass, FalseClass
560
+ find(function).execute([data, other], options)
561
+ end
562
+ end
563
+
564
+ def get_scalar(datum)
565
+ output = datum.value
566
+ case output
567
+ when Arrow::StringScalar then output.to_s
568
+ when Arrow::StructScalar
569
+ output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
570
+ else
571
+ output.value
572
+ end
573
+ end
200
574
  end
201
575
  end