red_amber 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/vector.rb
CHANGED
@@ -5,23 +5,57 @@ module RedAmber
|
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include
|
8
|
+
include Enumerable
|
9
|
+
include Helper
|
10
|
+
include ArrowFunction
|
9
11
|
include VectorUpdatable
|
10
12
|
include VectorSelectable
|
11
|
-
include Helper
|
12
13
|
|
13
14
|
using RefineArrayLike
|
14
15
|
|
15
16
|
# Quicker constructor of Vector.
|
16
17
|
#
|
18
|
+
# @param arrow_array [Arrow::Array]
|
19
|
+
# Arrow::Array object to have in the Vector.
|
20
|
+
# @return [Vector]
|
21
|
+
# created Vector.
|
22
|
+
# @note This method doesn't check argment type.
|
23
|
+
#
|
17
24
|
def self.create(arrow_array)
|
18
25
|
instance = allocate
|
19
26
|
instance.instance_variable_set(:@data, arrow_array)
|
20
27
|
instance
|
21
28
|
end
|
22
29
|
|
30
|
+
# Return true if it is an aggregation function.
|
31
|
+
#
|
32
|
+
# @param function [Symbol]
|
33
|
+
# function name to test.
|
34
|
+
# @return [Booleans]
|
35
|
+
# true if function is a aggregation function, otherwise false.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# Vector.aggregate?(:mean) # => true
|
39
|
+
#
|
40
|
+
# Vector.aggregate?(:round) # => false
|
41
|
+
#
|
42
|
+
# @since 0.4.0
|
43
|
+
#
|
44
|
+
def self.aggregate?(function)
|
45
|
+
%i[
|
46
|
+
all all? any any? approximate_median count count_distinct count_uniq
|
47
|
+
max mean median min min_max product quantile sd std stddev sum
|
48
|
+
unbiased_variance var variance
|
49
|
+
].include?(function.to_sym)
|
50
|
+
end
|
51
|
+
|
23
52
|
# Create a Vector.
|
24
53
|
#
|
54
|
+
# @param array [Array, Vector, Range, Arrow::Array, #to_arrow_array]
|
55
|
+
# array-like.
|
56
|
+
# @return [Vector]
|
57
|
+
# created Vector.
|
58
|
+
#
|
25
59
|
# @note default is headless Vector and '@key == nil'
|
26
60
|
def initialize(*array)
|
27
61
|
@data =
|
@@ -39,15 +73,99 @@ module RedAmber
|
|
39
73
|
end
|
40
74
|
end
|
41
75
|
|
76
|
+
# Entity of Vector.
|
77
|
+
#
|
78
|
+
# @return [Arrow::Array]
|
79
|
+
#
|
42
80
|
attr_reader :data
|
43
81
|
alias_method :to_arrow_array, :data
|
44
82
|
|
83
|
+
# Associated key name when self is in a DataFrame.
|
84
|
+
#
|
85
|
+
# Default Vector is 'head-less' (key-less).
|
86
|
+
# @return [Symbol]
|
87
|
+
#
|
45
88
|
attr_accessor :key
|
46
89
|
|
90
|
+
# Return other as a Vector which is same data type as self.
|
91
|
+
#
|
92
|
+
# @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
93
|
+
# a source array-like which will be converted.
|
94
|
+
# @return [Vector]
|
95
|
+
# resolved Vector.
|
96
|
+
# @example Integer to String
|
97
|
+
# Vector.new('A').resolve([1, 2])
|
98
|
+
#
|
99
|
+
# # =>
|
100
|
+
# #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
|
101
|
+
# ["1", "2"]
|
102
|
+
#
|
103
|
+
# @example String to Ineger
|
104
|
+
# Vector.new(1).resolve(['A'])
|
105
|
+
#
|
106
|
+
# # =>
|
107
|
+
# #<RedAmber::Vector(:uint8, size=1):0x00000000000037dc>
|
108
|
+
# [65]
|
109
|
+
#
|
110
|
+
# @example Upcast to uint16
|
111
|
+
# vector = Vector.new(256)
|
112
|
+
#
|
113
|
+
# # =>
|
114
|
+
# #<RedAmber::Vector(:uint16, size=1):0x000000000000c1fc>
|
115
|
+
# [256]
|
116
|
+
#
|
117
|
+
# vector.resolve([1, 2])
|
118
|
+
#
|
119
|
+
# # =>
|
120
|
+
# # Not a uint8 Vector
|
121
|
+
# #<RedAmber::Vector(:uint16, size=2):0x000000000000c328>
|
122
|
+
# [1, 2]
|
123
|
+
#
|
124
|
+
# @since 0.4.0
|
125
|
+
#
|
126
|
+
def resolve(other)
|
127
|
+
case other
|
128
|
+
when Vector
|
129
|
+
Vector.create(data.resolve(other.data))
|
130
|
+
when Array, Arrow::Array, Arrow::ChunkedArray
|
131
|
+
Vector.create(data.resolve(other))
|
132
|
+
else
|
133
|
+
raise VectorArgumentError, "invalid argument: #{other}"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# String representation of self like an Array.
|
138
|
+
#
|
139
|
+
# @return [String]
|
140
|
+
# return self as same as Array's inspect.
|
141
|
+
#
|
47
142
|
def to_s
|
48
143
|
@data.to_a.inspect
|
49
144
|
end
|
50
145
|
|
146
|
+
# String representation of self.
|
147
|
+
#
|
148
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
149
|
+
# - If it is 'MINIMUM', returns class and size.
|
150
|
+
# - If it is otherwise, returns class, size and preview.
|
151
|
+
# Default value of the ENV is 'Table'.
|
152
|
+
# @param limit [Integer]
|
153
|
+
# max width of the result.
|
154
|
+
# @return [String]
|
155
|
+
# show information of self as a String.
|
156
|
+
# @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
|
157
|
+
# puts vector.inspect
|
158
|
+
#
|
159
|
+
# # =>
|
160
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000037f0>
|
161
|
+
# [1, 2, 3]
|
162
|
+
#
|
163
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
|
164
|
+
# puts vector.inspect
|
165
|
+
#
|
166
|
+
# # =>
|
167
|
+
# RedAmber::Vector(:uint8, size=3)
|
168
|
+
#
|
51
169
|
def inspect(limit: 80)
|
52
170
|
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
53
171
|
# Better performance than `.upcase == 'MINIMUM'`
|
@@ -70,82 +188,187 @@ module RedAmber
|
|
70
188
|
end
|
71
189
|
end
|
72
190
|
|
191
|
+
# Convert to an Array.
|
192
|
+
#
|
193
|
+
# @return [Array]
|
194
|
+
# array representation.
|
195
|
+
#
|
73
196
|
def to_ary
|
74
197
|
@data.values
|
75
198
|
end
|
76
|
-
|
77
199
|
alias_method :to_a, :to_ary
|
78
200
|
alias_method :values, :to_ary
|
79
201
|
alias_method :entries, :to_ary
|
80
202
|
|
203
|
+
# Indeces from 0 to size-1 by Array.
|
204
|
+
#
|
205
|
+
# @return [Array]
|
206
|
+
# indices.
|
207
|
+
#
|
81
208
|
def indices
|
82
209
|
(0...size).to_a
|
83
210
|
end
|
84
|
-
|
85
211
|
alias_method :indexes, :indices
|
86
212
|
alias_method :indeces, :indices
|
87
213
|
|
214
|
+
# Vector size.
|
215
|
+
#
|
216
|
+
# @return [Integer]
|
217
|
+
# size of self.
|
218
|
+
#
|
88
219
|
def size
|
89
220
|
# only defined :length in Arrow?
|
90
221
|
@data.length
|
91
222
|
end
|
92
|
-
|
93
223
|
alias_method :length, :size
|
94
224
|
alias_method :n_rows, :size
|
95
225
|
alias_method :nrow, :size
|
96
226
|
|
227
|
+
# Test wheather self is empty.
|
228
|
+
#
|
229
|
+
# @return [true, false]
|
230
|
+
# true if self is empty.
|
231
|
+
#
|
97
232
|
def empty?
|
98
233
|
size.zero?
|
99
234
|
end
|
100
235
|
|
236
|
+
# Type nickname of self.
|
237
|
+
#
|
238
|
+
# @return [Symbol]
|
239
|
+
# type nickname of values.
|
240
|
+
#
|
101
241
|
def type
|
102
242
|
list? ? :list : @data.value_type.nick.to_sym
|
103
243
|
end
|
104
244
|
|
245
|
+
# Type Class of self.
|
246
|
+
#
|
247
|
+
# @return [type_Class]
|
248
|
+
# type class.
|
249
|
+
#
|
250
|
+
def type_class
|
251
|
+
@data.type_class
|
252
|
+
end
|
253
|
+
|
254
|
+
# Test if self is a boolean Vector.
|
255
|
+
#
|
256
|
+
# @return [true, false]
|
257
|
+
# test result.
|
258
|
+
#
|
105
259
|
def boolean?
|
106
260
|
@data.boolean?
|
107
261
|
end
|
108
262
|
|
263
|
+
# Test if self is a numeric Vector.
|
264
|
+
#
|
265
|
+
# @return [true, false]
|
266
|
+
# test result.
|
267
|
+
#
|
109
268
|
def numeric?
|
110
269
|
@data.numeric?
|
111
270
|
end
|
112
271
|
|
272
|
+
# Test if self is a float Vector.
|
273
|
+
#
|
274
|
+
# @return [true, false]
|
275
|
+
# test result.
|
276
|
+
#
|
113
277
|
def float?
|
114
278
|
@data.float?
|
115
279
|
end
|
116
280
|
|
281
|
+
# Test if self is a integer Vector.
|
282
|
+
#
|
283
|
+
# @return [true, false]
|
284
|
+
# test result.
|
285
|
+
#
|
117
286
|
def integer?
|
118
287
|
@data.integer?
|
119
288
|
end
|
120
289
|
|
290
|
+
# Test if self is a string Vector.
|
291
|
+
#
|
292
|
+
# @return [true, false]
|
293
|
+
# test result.
|
294
|
+
#
|
121
295
|
def string?
|
122
296
|
@data.string?
|
123
297
|
end
|
124
298
|
|
299
|
+
# Test if self is a dictionary Vector.
|
300
|
+
#
|
301
|
+
# @return [true, false]
|
302
|
+
# test result.
|
303
|
+
#
|
125
304
|
def dictionary?
|
126
305
|
@data.dictionary?
|
127
306
|
end
|
128
307
|
|
308
|
+
# Test if self is a temporal Vector.
|
309
|
+
#
|
310
|
+
# @return [true, false]
|
311
|
+
# test result.
|
312
|
+
#
|
129
313
|
def temporal?
|
130
314
|
@data.temporal?
|
131
315
|
end
|
132
316
|
|
317
|
+
# Test if self is a list Vector.
|
318
|
+
#
|
319
|
+
# @return [true, false]
|
320
|
+
# test result.
|
321
|
+
#
|
133
322
|
def list?
|
134
323
|
@data.list?
|
135
324
|
end
|
136
325
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
326
|
+
# Iterates over Vector elements or returns a Enumerator.
|
327
|
+
#
|
328
|
+
# @overload each
|
329
|
+
# Returns a new Enumerator if no block given.
|
330
|
+
#
|
331
|
+
# @return [Enumerator]
|
332
|
+
# Enumerator of each elements.
|
333
|
+
#
|
334
|
+
# @overload each
|
335
|
+
# When a block given, passes each element in self to the block.
|
336
|
+
#
|
337
|
+
# @yieldparam element [Object]
|
338
|
+
# passes element by a block parameter.
|
339
|
+
# @yieldreturn [Object]
|
340
|
+
# evaluated result value from the block.
|
341
|
+
# @return [self]
|
342
|
+
# returns self.
|
343
|
+
#
|
141
344
|
def each
|
142
345
|
return enum_for(:each) unless block_given?
|
143
346
|
|
144
347
|
size.times do |i|
|
145
348
|
yield data[i]
|
146
349
|
end
|
350
|
+
self
|
147
351
|
end
|
148
352
|
|
353
|
+
# Returns a Vector from collected objects from the block.
|
354
|
+
#
|
355
|
+
# @overload map
|
356
|
+
# Returns a new Enumerator if no block given.
|
357
|
+
#
|
358
|
+
# @return [Enumerator]
|
359
|
+
# a new Enumerator.
|
360
|
+
#
|
361
|
+
# @overload map
|
362
|
+
# When a block given, calls the block with successive elements.
|
363
|
+
# Returns a Vector of the objects returned by the block.
|
364
|
+
#
|
365
|
+
# @yieldparam element [Object]
|
366
|
+
# passes element by a block parameter.
|
367
|
+
# @yieldreturn [Object]
|
368
|
+
# evaluated result value from the block.
|
369
|
+
# @return [self]
|
370
|
+
# returns the collected values from the block as a Vector.
|
371
|
+
#
|
149
372
|
def map(&block)
|
150
373
|
return enum_for(:map) unless block
|
151
374
|
|
@@ -153,18 +376,35 @@ module RedAmber
|
|
153
376
|
end
|
154
377
|
alias_method :collect, :map
|
155
378
|
|
156
|
-
#
|
379
|
+
# Tests wheather self is chunked or not.
|
380
|
+
#
|
381
|
+
# @api private
|
382
|
+
# @return [true, false]
|
383
|
+
# returns true if #data is chunked.
|
384
|
+
#
|
157
385
|
def chunked?
|
158
386
|
@data.is_a? Arrow::ChunkedArray
|
159
387
|
end
|
160
388
|
|
161
|
-
#
|
389
|
+
# Returns the number of chunks.
|
390
|
+
#
|
391
|
+
# @api private
|
392
|
+
# @return [Integer]
|
393
|
+
# the number of chunks. If self is not chunked, returns zero.
|
394
|
+
#
|
162
395
|
def n_chunks
|
163
396
|
chunked? ? @data.n_chunks : 0
|
164
397
|
end
|
165
398
|
|
166
399
|
# def each_chunk() end
|
167
400
|
|
401
|
+
# Returns a hash containing the counts of equal elements.
|
402
|
+
#
|
403
|
+
# - Each key is an element of self.
|
404
|
+
# - Each value is the number of elements equal to the key.
|
405
|
+
# @return [Hash]
|
406
|
+
# result in a Hash.
|
407
|
+
#
|
168
408
|
def tally
|
169
409
|
hash = values.tally
|
170
410
|
if (type_class < Arrow::FloatingPointDataType) && is_nan.any
|
@@ -180,22 +420,156 @@ module RedAmber
|
|
180
420
|
hash
|
181
421
|
end
|
182
422
|
|
423
|
+
# @api private
|
424
|
+
# Arrow imprementation of #tally
|
183
425
|
def value_counts
|
184
426
|
values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
|
185
427
|
values.zip(counts).to_h
|
186
428
|
end
|
187
429
|
|
430
|
+
# Count nils in self.
|
431
|
+
#
|
432
|
+
# @return [Integer]
|
433
|
+
# the number of nils.
|
434
|
+
#
|
188
435
|
def n_nulls
|
189
436
|
@data.n_nulls
|
190
437
|
end
|
191
438
|
alias_method :n_nils, :n_nulls
|
192
439
|
|
440
|
+
# Count NaNs in self if self is a numeric Vector
|
441
|
+
#
|
442
|
+
# @return [Integer]
|
443
|
+
# the number of Float::NANs. If self is not a numeric Vector,
|
444
|
+
# returns 0.
|
445
|
+
#
|
193
446
|
def n_nans
|
194
447
|
numeric? ? is_nan.to_a.count(true) : 0
|
195
448
|
end
|
196
449
|
|
450
|
+
# Return true if self has any nil.
|
451
|
+
#
|
452
|
+
# @return [true, false]
|
453
|
+
# true or false.
|
454
|
+
#
|
197
455
|
def has_nil?
|
198
456
|
is_nil.any
|
199
457
|
end
|
458
|
+
|
459
|
+
# Enable to compute with coercion mechanism.
|
460
|
+
#
|
461
|
+
# @example
|
462
|
+
# vector = Vector.new(1,2,3)
|
463
|
+
#
|
464
|
+
# # =>
|
465
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
466
|
+
# [1, 2, 3]
|
467
|
+
#
|
468
|
+
# # Vector's `#*` method
|
469
|
+
# vector * -1
|
470
|
+
#
|
471
|
+
# # =>
|
472
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000e3698>
|
473
|
+
# [-1, -2, -3]
|
474
|
+
#
|
475
|
+
# # coerced calculation
|
476
|
+
# -1 * vector
|
477
|
+
#
|
478
|
+
# # =>
|
479
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000ea4ac>
|
480
|
+
# [-1, -2, -3]
|
481
|
+
#
|
482
|
+
# # `@-` operator
|
483
|
+
# -vector
|
484
|
+
#
|
485
|
+
# # =>
|
486
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000ee7b4>
|
487
|
+
# [255, 254, 253]
|
488
|
+
#
|
489
|
+
def coerce(other)
|
490
|
+
[Vector.new(Array(other) * size), self]
|
491
|
+
end
|
492
|
+
|
493
|
+
# Spread the return value of an aggregate function as if
|
494
|
+
# it is a element-wise function.
|
495
|
+
#
|
496
|
+
# @overload propagate(function)
|
497
|
+
# Returns a Vector of same size as self spreading the value from function.
|
498
|
+
#
|
499
|
+
# @param function [Symbol] a name of aggregation function for self.
|
500
|
+
# Return value of the function must be a scalar.
|
501
|
+
# @return [Vector] Returns a Vector that is the same size as self
|
502
|
+
# and such that all elements are the same as the result of aggregation `function`.
|
503
|
+
# @example propagate by an aggragation function name
|
504
|
+
# vec = Vector.new(1, 2, 3, 4)
|
505
|
+
# vec.propagate(:mean)
|
506
|
+
# # =>
|
507
|
+
# #<RedAmber::Vector(:double, size=4):0x000000000001985c>
|
508
|
+
# [2.5, 2.5, 2.5, 2.5]
|
509
|
+
#
|
510
|
+
# @overload propagate
|
511
|
+
# Returns a Vector of same size as self spreading the value from block.
|
512
|
+
#
|
513
|
+
# @yieldparam self [Vector]
|
514
|
+
# gives self to the block.
|
515
|
+
# @yieldreturn [scalar]
|
516
|
+
# a scalar value.
|
517
|
+
# @return [Vector]
|
518
|
+
# returns a Vector that is the same size as self
|
519
|
+
# and such that all elements are the same as the yielded value from the block.
|
520
|
+
# @example propagate by a block
|
521
|
+
# vec.propagate { |v| v.mean.round }
|
522
|
+
# # =>
|
523
|
+
# #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
|
524
|
+
# [3, 3, 3, 3]
|
525
|
+
#
|
526
|
+
# @since 0.4.0
|
527
|
+
#
|
528
|
+
def propagate(function = nil, &block)
|
529
|
+
value =
|
530
|
+
if block
|
531
|
+
raise VectorArgumentError, "can't specify both function and block" if function
|
532
|
+
|
533
|
+
yield self
|
534
|
+
else
|
535
|
+
function = function&.to_sym
|
536
|
+
unless function && respond_to?(function) && Vector.aggregate?(function)
|
537
|
+
raise VectorArgumentError, "illegal function: #{function.inspect}"
|
538
|
+
end
|
539
|
+
|
540
|
+
send(function)
|
541
|
+
end
|
542
|
+
Vector.new([value] * size)
|
543
|
+
end
|
544
|
+
alias_method :expand, :propagate
|
545
|
+
|
546
|
+
private # =======
|
547
|
+
|
548
|
+
def exec_func_unary(function, options)
|
549
|
+
options = nil if options.empty?
|
550
|
+
find(function).execute([data], options)
|
551
|
+
end
|
552
|
+
|
553
|
+
def exec_func_binary(function, other, options)
|
554
|
+
options = nil if options.empty?
|
555
|
+
case other
|
556
|
+
when Vector
|
557
|
+
find(function).execute([data, other.data], options)
|
558
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
559
|
+
Array, Numeric, String, TrueClass, FalseClass
|
560
|
+
find(function).execute([data, other], options)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
def get_scalar(datum)
|
565
|
+
output = datum.value
|
566
|
+
case output
|
567
|
+
when Arrow::StringScalar then output.to_s
|
568
|
+
when Arrow::StructScalar
|
569
|
+
output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
|
570
|
+
else
|
571
|
+
output.value
|
572
|
+
end
|
573
|
+
end
|
200
574
|
end
|
201
575
|
end
|