red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -5,15 +5,22 @@ module RedAmber
5
5
  # @data : holds Arrow::ChunkedArray
6
6
  class Vector
7
7
  # mix-in
8
- include VectorFunctions
8
+ include Enumerable
9
+ include Helper
10
+ include ArrowFunction
9
11
  include VectorUpdatable
10
12
  include VectorSelectable
11
- include Helper
12
13
 
13
14
  using RefineArrayLike
14
15
 
15
16
  # Quicker constructor of Vector.
16
17
  #
18
+ # @param arrow_array [Arrow::Array]
19
+ # Arrow::Array object to have in the Vector.
20
+ # @return [Vector]
21
+ # created Vector.
22
+ # @note This method doesn't check argment type.
23
+ #
17
24
  def self.create(arrow_array)
18
25
  instance = allocate
19
26
  instance.instance_variable_set(:@data, arrow_array)
@@ -22,6 +29,11 @@ module RedAmber
22
29
 
23
30
  # Create a Vector.
24
31
  #
32
+ # @param array [Array, Vector, Range, Arrow::Array, #to_arrow_array]
33
+ # array-like.
34
+ # @return [Vector]
35
+ # created Vector.
36
+ #
25
37
  # @note default is headless Vector and '@key == nil'
26
38
  def initialize(*array)
27
39
  @data =
@@ -39,15 +51,99 @@ module RedAmber
39
51
  end
40
52
  end
41
53
 
54
+ # Entity of Vector.
55
+ #
56
+ # @return [Arrow::Array]
57
+ #
42
58
  attr_reader :data
43
59
  alias_method :to_arrow_array, :data
44
60
 
61
+ # Associated key name when self is in a DataFrame.
62
+ #
63
+ # Default Vector is 'head-less' (key-less).
64
+ # @return [Symbol]
65
+ #
45
66
  attr_accessor :key
46
67
 
68
+ # Return other as a Vector which is same data type as self.
69
+ #
70
+ # @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
71
+ # a source array-like which will be converted.
72
+ # @return [Vector]
73
+ # resolved Vector.
74
+ # @example Integer to String
75
+ # Vector.new('A').resolve([1, 2])
76
+ #
77
+ # # =>
78
+ # #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
79
+ # ["1", "2"]
80
+ #
81
+ # @example String to Ineger
82
+ # Vector.new(1).resolve(['A'])
83
+ #
84
+ # # =>
85
+ # #<RedAmber::Vector(:uint8, size=1):0x00000000000037dc>
86
+ # [65]
87
+ #
88
+ # @example Upcast to uint16
89
+ # vector = Vector.new(256)
90
+ #
91
+ # # =>
92
+ # #<RedAmber::Vector(:uint16, size=1):0x000000000000c1fc>
93
+ # [256]
94
+ #
95
+ # vector.resolve([1, 2])
96
+ #
97
+ # # =>
98
+ # # Not a uint8 Vector
99
+ # #<RedAmber::Vector(:uint16, size=2):0x000000000000c328>
100
+ # [1, 2]
101
+ #
102
+ # @since 0.4.0
103
+ #
104
+ def resolve(other)
105
+ case other
106
+ when Vector
107
+ Vector.create(data.resolve(other.data))
108
+ when Array, Arrow::Array, Arrow::ChunkedArray
109
+ Vector.create(data.resolve(other))
110
+ else
111
+ raise VectorArgumentError, "invalid argument: #{other}"
112
+ end
113
+ end
114
+
115
+ # String representation of self like an Array.
116
+ #
117
+ # @return [String]
118
+ # return self as same as Array's inspect.
119
+ #
47
120
  def to_s
48
121
  @data.to_a.inspect
49
122
  end
50
123
 
124
+ # String representation of self.
125
+ #
126
+ # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
127
+ # - If it is 'MINIMUM', returns class and size.
128
+ # - If it is otherwise, returns class, size and preview.
129
+ # Default value of the ENV is 'Table'.
130
+ # @param limit [Integer]
131
+ # max width of the result.
132
+ # @return [String]
133
+ # show information of self as a String.
134
+ # @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
135
+ # puts vector.inspect
136
+ #
137
+ # # =>
138
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000037f0>
139
+ # [1, 2, 3]
140
+ #
141
+ # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
142
+ # puts vector.inspect
143
+ #
144
+ # # =>
145
+ # RedAmber::Vector(:uint8, size=3)
146
+ #
51
147
  def inspect(limit: 80)
52
148
  if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
53
149
  # Better performance than `.upcase == 'MINIMUM'`
@@ -70,82 +166,187 @@ module RedAmber
70
166
  end
71
167
  end
72
168
 
169
+ # Convert to an Array.
170
+ #
171
+ # @return [Array]
172
+ # array representation.
173
+ #
73
174
  def to_ary
74
175
  @data.values
75
176
  end
76
-
77
177
  alias_method :to_a, :to_ary
78
178
  alias_method :values, :to_ary
79
179
  alias_method :entries, :to_ary
80
180
 
181
+ # Indeces from 0 to size-1 by Array.
182
+ #
183
+ # @return [Array]
184
+ # indices.
185
+ #
81
186
  def indices
82
187
  (0...size).to_a
83
188
  end
84
-
85
189
  alias_method :indexes, :indices
86
190
  alias_method :indeces, :indices
87
191
 
192
+ # Vector size.
193
+ #
194
+ # @return [Integer]
195
+ # size of self.
196
+ #
88
197
  def size
89
198
  # only defined :length in Arrow?
90
199
  @data.length
91
200
  end
92
-
93
201
  alias_method :length, :size
94
202
  alias_method :n_rows, :size
95
203
  alias_method :nrow, :size
96
204
 
205
+ # Test wheather self is empty.
206
+ #
207
+ # @return [true, false]
208
+ # true if self is empty.
209
+ #
97
210
  def empty?
98
211
  size.zero?
99
212
  end
100
213
 
214
+ # Type nickname of self.
215
+ #
216
+ # @return [Symbol]
217
+ # type nickname of values.
218
+ #
101
219
  def type
102
220
  list? ? :list : @data.value_type.nick.to_sym
103
221
  end
104
222
 
223
+ # Type Class of self.
224
+ #
225
+ # @return [type_Class]
226
+ # type class.
227
+ #
228
+ def type_class
229
+ @data.type_class
230
+ end
231
+
232
+ # Test if self is a boolean Vector.
233
+ #
234
+ # @return [true, false]
235
+ # test result.
236
+ #
105
237
  def boolean?
106
238
  @data.boolean?
107
239
  end
108
240
 
241
+ # Test if self is a numeric Vector.
242
+ #
243
+ # @return [true, false]
244
+ # test result.
245
+ #
109
246
  def numeric?
110
247
  @data.numeric?
111
248
  end
112
249
 
250
+ # Test if self is a float Vector.
251
+ #
252
+ # @return [true, false]
253
+ # test result.
254
+ #
113
255
  def float?
114
256
  @data.float?
115
257
  end
116
258
 
259
+ # Test if self is a integer Vector.
260
+ #
261
+ # @return [true, false]
262
+ # test result.
263
+ #
117
264
  def integer?
118
265
  @data.integer?
119
266
  end
120
267
 
268
+ # Test if self is a string Vector.
269
+ #
270
+ # @return [true, false]
271
+ # test result.
272
+ #
121
273
  def string?
122
274
  @data.string?
123
275
  end
124
276
 
277
+ # Test if self is a dictionary Vector.
278
+ #
279
+ # @return [true, false]
280
+ # test result.
281
+ #
125
282
  def dictionary?
126
283
  @data.dictionary?
127
284
  end
128
285
 
286
+ # Test if self is a temporal Vector.
287
+ #
288
+ # @return [true, false]
289
+ # test result.
290
+ #
129
291
  def temporal?
130
292
  @data.temporal?
131
293
  end
132
294
 
295
+ # Test if self is a list Vector.
296
+ #
297
+ # @return [true, false]
298
+ # test result.
299
+ #
133
300
  def list?
134
301
  @data.list?
135
302
  end
136
303
 
137
- def type_class
138
- @data.type_class
139
- end
140
-
304
+ # Iterates over Vector elements or returns a Enumerator.
305
+ #
306
+ # @overload each
307
+ # Returns a new Enumerator if no block given.
308
+ #
309
+ # @return [Enumerator]
310
+ # Enumerator of each elements.
311
+ #
312
+ # @overload each
313
+ # When a block given, passes each element in self to the block.
314
+ #
315
+ # @yieldparam element [Object]
316
+ # passes element by a block parameter.
317
+ # @yieldreturn [Object]
318
+ # evaluated result value from the block.
319
+ # @return [self]
320
+ # returns self.
321
+ #
141
322
  def each
142
323
  return enum_for(:each) unless block_given?
143
324
 
144
325
  size.times do |i|
145
326
  yield data[i]
146
327
  end
328
+ self
147
329
  end
148
330
 
331
+ # Returns a Vector from collected objects from the block.
332
+ #
333
+ # @overload map
334
+ # Returns a new Enumerator if no block given.
335
+ #
336
+ # @return [Enumerator]
337
+ # a new Enumerator.
338
+ #
339
+ # @overload map
340
+ # When a block given, calls the block with successive elements.
341
+ # Returns a Vector of the objects returned by the block.
342
+ #
343
+ # @yieldparam element [Object]
344
+ # passes element by a block parameter.
345
+ # @yieldreturn [Object]
346
+ # evaluated result value from the block.
347
+ # @return [self]
348
+ # returns the collected values from the block as a Vector.
349
+ #
149
350
  def map(&block)
150
351
  return enum_for(:map) unless block
151
352
 
@@ -153,18 +354,35 @@ module RedAmber
153
354
  end
154
355
  alias_method :collect, :map
155
356
 
156
- # undocumented
357
+ # Tests wheather self is chunked or not.
358
+ #
359
+ # @api private
360
+ # @return [true, false]
361
+ # returns true if #data is chunked.
362
+ #
157
363
  def chunked?
158
364
  @data.is_a? Arrow::ChunkedArray
159
365
  end
160
366
 
161
- # undocumented
367
+ # Returns the number of chunks.
368
+ #
369
+ # @api private
370
+ # @return [Integer]
371
+ # the number of chunks. If self is not chunked, returns zero.
372
+ #
162
373
  def n_chunks
163
374
  chunked? ? @data.n_chunks : 0
164
375
  end
165
376
 
166
377
  # def each_chunk() end
167
378
 
379
+ # Returns a hash containing the counts of equal elements.
380
+ #
381
+ # - Each key is an element of self.
382
+ # - Each value is the number of elements equal to the key.
383
+ # @return [Hash]
384
+ # result in a Hash.
385
+ #
168
386
  def tally
169
387
  hash = values.tally
170
388
  if (type_class < Arrow::FloatingPointDataType) && is_nan.any
@@ -180,22 +398,155 @@ module RedAmber
180
398
  hash
181
399
  end
182
400
 
401
+ # @api private
402
+ # Arrow imprementation of #tally
183
403
  def value_counts
184
404
  values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
185
405
  values.zip(counts).to_h
186
406
  end
187
407
 
408
+ # Count nils in self.
409
+ #
410
+ # @return [Integer]
411
+ # the number of nils.
412
+ #
188
413
  def n_nulls
189
414
  @data.n_nulls
190
415
  end
191
416
  alias_method :n_nils, :n_nulls
192
417
 
418
+ # Count NaNs in self if self is a numeric Vector
419
+ #
420
+ # @return [Integer]
421
+ # the number of Float::NANs. If self is not a numeric Vector,
422
+ # returns 0.
423
+ #
193
424
  def n_nans
194
425
  numeric? ? is_nan.to_a.count(true) : 0
195
426
  end
196
427
 
428
+ # Return true if self has any nil.
429
+ #
430
+ # @return [true, false]
431
+ # true or false.
432
+ #
197
433
  def has_nil?
198
434
  is_nil.any
199
435
  end
436
+
437
+ # Enable to compute with coercion mechanism.
438
+ #
439
+ # @example
440
+ # vector = Vector.new(1,2,3)
441
+ #
442
+ # # =>
443
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
444
+ # [1, 2, 3]
445
+ #
446
+ # # Vector's `#*` method
447
+ # vector * -1
448
+ #
449
+ # # =>
450
+ # #<RedAmber::Vector(:int16, size=3):0x00000000000e3698>
451
+ # [-1, -2, -3]
452
+ #
453
+ # # coerced calculation
454
+ # -1 * vector
455
+ #
456
+ # # =>
457
+ # #<RedAmber::Vector(:int16, size=3):0x00000000000ea4ac>
458
+ # [-1, -2, -3]
459
+ #
460
+ # # `@-` operator
461
+ # -vector
462
+ #
463
+ # # =>
464
+ # #<RedAmber::Vector(:uint8, size=3):0x00000000000ee7b4>
465
+ # [255, 254, 253]
466
+ #
467
+ def coerce(other)
468
+ [Vector.new(Array(other) * size), self]
469
+ end
470
+
471
+ # Spread the return value of an aggregate function as if
472
+ # it is a element-wise function.
473
+ #
474
+ # @overload propagate(function)
475
+ # Returns a Vector of same size as self spreading the value from function.
476
+ #
477
+ # @param function [Symbol] a name of aggregation function for self.
478
+ # Return value of the function must be a scalar.
479
+ # @return [Vector] Returns a Vector that is the same size as self
480
+ # and such that all elements are the same as the result of aggregation `function`.
481
+ # @example propagate by an aggragation function name
482
+ # vec = Vector.new(1, 2, 3, 4)
483
+ # vec.propagate(:mean)
484
+ # # =>
485
+ # #<RedAmber::Vector(:double, size=4):0x000000000001985c>
486
+ # [2.5, 2.5, 2.5, 2.5]
487
+ #
488
+ # @overload propagate
489
+ # Returns a Vector of same size as self spreading the value from block.
490
+ #
491
+ # @yieldparam self [Vector]
492
+ # gives self to the block.
493
+ # @yieldreturn [scalar]
494
+ # a scalar value.
495
+ # @return [Vector]
496
+ # returns a Vector that is the same size as self
497
+ # and such that all elements are the same as the yielded value from the block.
498
+ # @example propagate by a block
499
+ # vec.propagate { |v| v.mean.round }
500
+ # # =>
501
+ # #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
502
+ # [3, 3, 3, 3]
503
+ #
504
+ # @since 0.4.0
505
+ #
506
+ def propagate(function = nil, &block)
507
+ value =
508
+ if block
509
+ raise VectorArgumentError, "can't specify both function and block" if function
510
+
511
+ yield self
512
+ else
513
+ send(function&.to_sym)
514
+ end
515
+ raise VectorArgumentError, 'not an aggregation function' if value.is_a?(Vector)
516
+
517
+ Vector.new([value] * size)
518
+ end
519
+ alias_method :expand, :propagate
520
+
521
+ private # =======
522
+
523
+ def exec_func_unary(function, options)
524
+ options = nil if options.empty?
525
+ find(function).execute([data], options)
526
+ end
527
+
528
+ def exec_func_binary(function, other, options)
529
+ options = nil if options.empty?
530
+ case other
531
+ when Vector
532
+ find(function).execute([data, other.data], options)
533
+ when NilClass
534
+ nils = data.class.new([nil] * size)
535
+ find(function).execute([data, nils], options)
536
+ else
537
+ find(function).execute([data, other], options)
538
+ end
539
+ end
540
+
541
+ def get_scalar(datum)
542
+ output = datum.value
543
+ case output
544
+ when Arrow::StringScalar then output.to_s
545
+ when Arrow::StructScalar
546
+ output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
547
+ else
548
+ output.value
549
+ end
550
+ end
200
551
  end
201
552
  end