red_amber 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/vector.rb
CHANGED
@@ -5,23 +5,57 @@ module RedAmber
|
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include
|
8
|
+
include Enumerable
|
9
|
+
include Helper
|
10
|
+
include ArrowFunction
|
9
11
|
include VectorUpdatable
|
10
12
|
include VectorSelectable
|
11
|
-
include Helper
|
12
13
|
|
13
14
|
using RefineArrayLike
|
14
15
|
|
15
16
|
# Quicker constructor of Vector.
|
16
17
|
#
|
18
|
+
# @param arrow_array [Arrow::Array]
|
19
|
+
# Arrow::Array object to have in the Vector.
|
20
|
+
# @return [Vector]
|
21
|
+
# created Vector.
|
22
|
+
# @note This method doesn't check argment type.
|
23
|
+
#
|
17
24
|
def self.create(arrow_array)
|
18
25
|
instance = allocate
|
19
26
|
instance.instance_variable_set(:@data, arrow_array)
|
20
27
|
instance
|
21
28
|
end
|
22
29
|
|
30
|
+
# Return true if it is an aggregation function.
|
31
|
+
#
|
32
|
+
# @param function [Symbol]
|
33
|
+
# function name to test.
|
34
|
+
# @return [Booleans]
|
35
|
+
# true if function is a aggregation function, otherwise false.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# Vector.aggregate?(:mean) # => true
|
39
|
+
#
|
40
|
+
# Vector.aggregate?(:round) # => false
|
41
|
+
#
|
42
|
+
# @since 0.4.0
|
43
|
+
#
|
44
|
+
def self.aggregate?(function)
|
45
|
+
%i[
|
46
|
+
all all? any any? approximate_median count count_distinct count_uniq
|
47
|
+
max mean median min min_max product quantile sd std stddev sum
|
48
|
+
unbiased_variance var variance
|
49
|
+
].include?(function.to_sym)
|
50
|
+
end
|
51
|
+
|
23
52
|
# Create a Vector.
|
24
53
|
#
|
54
|
+
# @param array [Array, Vector, Range, Arrow::Array, #to_arrow_array]
|
55
|
+
# array-like.
|
56
|
+
# @return [Vector]
|
57
|
+
# created Vector.
|
58
|
+
#
|
25
59
|
# @note default is headless Vector and '@key == nil'
|
26
60
|
def initialize(*array)
|
27
61
|
@data =
|
@@ -39,15 +73,99 @@ module RedAmber
|
|
39
73
|
end
|
40
74
|
end
|
41
75
|
|
76
|
+
# Entity of Vector.
|
77
|
+
#
|
78
|
+
# @return [Arrow::Array]
|
79
|
+
#
|
42
80
|
attr_reader :data
|
43
81
|
alias_method :to_arrow_array, :data
|
44
82
|
|
83
|
+
# Associated key name when self is in a DataFrame.
|
84
|
+
#
|
85
|
+
# Default Vector is 'head-less' (key-less).
|
86
|
+
# @return [Symbol]
|
87
|
+
#
|
45
88
|
attr_accessor :key
|
46
89
|
|
90
|
+
# Return other as a Vector which is same data type as self.
|
91
|
+
#
|
92
|
+
# @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
93
|
+
# a source array-like which will be converted.
|
94
|
+
# @return [Vector]
|
95
|
+
# resolved Vector.
|
96
|
+
# @example Integer to String
|
97
|
+
# Vector.new('A').resolve([1, 2])
|
98
|
+
#
|
99
|
+
# # =>
|
100
|
+
# #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
|
101
|
+
# ["1", "2"]
|
102
|
+
#
|
103
|
+
# @example String to Ineger
|
104
|
+
# Vector.new(1).resolve(['A'])
|
105
|
+
#
|
106
|
+
# # =>
|
107
|
+
# #<RedAmber::Vector(:uint8, size=1):0x00000000000037dc>
|
108
|
+
# [65]
|
109
|
+
#
|
110
|
+
# @example Upcast to uint16
|
111
|
+
# vector = Vector.new(256)
|
112
|
+
#
|
113
|
+
# # =>
|
114
|
+
# #<RedAmber::Vector(:uint16, size=1):0x000000000000c1fc>
|
115
|
+
# [256]
|
116
|
+
#
|
117
|
+
# vector.resolve([1, 2])
|
118
|
+
#
|
119
|
+
# # =>
|
120
|
+
# # Not a uint8 Vector
|
121
|
+
# #<RedAmber::Vector(:uint16, size=2):0x000000000000c328>
|
122
|
+
# [1, 2]
|
123
|
+
#
|
124
|
+
# @since 0.4.0
|
125
|
+
#
|
126
|
+
def resolve(other)
|
127
|
+
case other
|
128
|
+
when Vector
|
129
|
+
Vector.create(data.resolve(other.data))
|
130
|
+
when Array, Arrow::Array, Arrow::ChunkedArray
|
131
|
+
Vector.create(data.resolve(other))
|
132
|
+
else
|
133
|
+
raise VectorArgumentError, "invalid argument: #{other}"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# String representation of self like an Array.
|
138
|
+
#
|
139
|
+
# @return [String]
|
140
|
+
# return self as same as Array's inspect.
|
141
|
+
#
|
47
142
|
def to_s
|
48
143
|
@data.to_a.inspect
|
49
144
|
end
|
50
145
|
|
146
|
+
# String representation of self.
|
147
|
+
#
|
148
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
149
|
+
# - If it is 'MINIMUM', returns class and size.
|
150
|
+
# - If it is otherwise, returns class, size and preview.
|
151
|
+
# Default value of the ENV is 'Table'.
|
152
|
+
# @param limit [Integer]
|
153
|
+
# max width of the result.
|
154
|
+
# @return [String]
|
155
|
+
# show information of self as a String.
|
156
|
+
# @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
|
157
|
+
# puts vector.inspect
|
158
|
+
#
|
159
|
+
# # =>
|
160
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000037f0>
|
161
|
+
# [1, 2, 3]
|
162
|
+
#
|
163
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
|
164
|
+
# puts vector.inspect
|
165
|
+
#
|
166
|
+
# # =>
|
167
|
+
# RedAmber::Vector(:uint8, size=3)
|
168
|
+
#
|
51
169
|
def inspect(limit: 80)
|
52
170
|
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
53
171
|
# Better performance than `.upcase == 'MINIMUM'`
|
@@ -70,82 +188,187 @@ module RedAmber
|
|
70
188
|
end
|
71
189
|
end
|
72
190
|
|
191
|
+
# Convert to an Array.
|
192
|
+
#
|
193
|
+
# @return [Array]
|
194
|
+
# array representation.
|
195
|
+
#
|
73
196
|
def to_ary
|
74
197
|
@data.values
|
75
198
|
end
|
76
|
-
|
77
199
|
alias_method :to_a, :to_ary
|
78
200
|
alias_method :values, :to_ary
|
79
201
|
alias_method :entries, :to_ary
|
80
202
|
|
203
|
+
# Indeces from 0 to size-1 by Array.
|
204
|
+
#
|
205
|
+
# @return [Array]
|
206
|
+
# indices.
|
207
|
+
#
|
81
208
|
def indices
|
82
209
|
(0...size).to_a
|
83
210
|
end
|
84
|
-
|
85
211
|
alias_method :indexes, :indices
|
86
212
|
alias_method :indeces, :indices
|
87
213
|
|
214
|
+
# Vector size.
|
215
|
+
#
|
216
|
+
# @return [Integer]
|
217
|
+
# size of self.
|
218
|
+
#
|
88
219
|
def size
|
89
220
|
# only defined :length in Arrow?
|
90
221
|
@data.length
|
91
222
|
end
|
92
|
-
|
93
223
|
alias_method :length, :size
|
94
224
|
alias_method :n_rows, :size
|
95
225
|
alias_method :nrow, :size
|
96
226
|
|
227
|
+
# Test wheather self is empty.
|
228
|
+
#
|
229
|
+
# @return [true, false]
|
230
|
+
# true if self is empty.
|
231
|
+
#
|
97
232
|
def empty?
|
98
233
|
size.zero?
|
99
234
|
end
|
100
235
|
|
236
|
+
# Type nickname of self.
|
237
|
+
#
|
238
|
+
# @return [Symbol]
|
239
|
+
# type nickname of values.
|
240
|
+
#
|
101
241
|
def type
|
102
242
|
list? ? :list : @data.value_type.nick.to_sym
|
103
243
|
end
|
104
244
|
|
245
|
+
# Type Class of self.
|
246
|
+
#
|
247
|
+
# @return [type_Class]
|
248
|
+
# type class.
|
249
|
+
#
|
250
|
+
def type_class
|
251
|
+
@data.type_class
|
252
|
+
end
|
253
|
+
|
254
|
+
# Test if self is a boolean Vector.
|
255
|
+
#
|
256
|
+
# @return [true, false]
|
257
|
+
# test result.
|
258
|
+
#
|
105
259
|
def boolean?
|
106
260
|
@data.boolean?
|
107
261
|
end
|
108
262
|
|
263
|
+
# Test if self is a numeric Vector.
|
264
|
+
#
|
265
|
+
# @return [true, false]
|
266
|
+
# test result.
|
267
|
+
#
|
109
268
|
def numeric?
|
110
269
|
@data.numeric?
|
111
270
|
end
|
112
271
|
|
272
|
+
# Test if self is a float Vector.
|
273
|
+
#
|
274
|
+
# @return [true, false]
|
275
|
+
# test result.
|
276
|
+
#
|
113
277
|
def float?
|
114
278
|
@data.float?
|
115
279
|
end
|
116
280
|
|
281
|
+
# Test if self is a integer Vector.
|
282
|
+
#
|
283
|
+
# @return [true, false]
|
284
|
+
# test result.
|
285
|
+
#
|
117
286
|
def integer?
|
118
287
|
@data.integer?
|
119
288
|
end
|
120
289
|
|
290
|
+
# Test if self is a string Vector.
|
291
|
+
#
|
292
|
+
# @return [true, false]
|
293
|
+
# test result.
|
294
|
+
#
|
121
295
|
def string?
|
122
296
|
@data.string?
|
123
297
|
end
|
124
298
|
|
299
|
+
# Test if self is a dictionary Vector.
|
300
|
+
#
|
301
|
+
# @return [true, false]
|
302
|
+
# test result.
|
303
|
+
#
|
125
304
|
def dictionary?
|
126
305
|
@data.dictionary?
|
127
306
|
end
|
128
307
|
|
308
|
+
# Test if self is a temporal Vector.
|
309
|
+
#
|
310
|
+
# @return [true, false]
|
311
|
+
# test result.
|
312
|
+
#
|
129
313
|
def temporal?
|
130
314
|
@data.temporal?
|
131
315
|
end
|
132
316
|
|
317
|
+
# Test if self is a list Vector.
|
318
|
+
#
|
319
|
+
# @return [true, false]
|
320
|
+
# test result.
|
321
|
+
#
|
133
322
|
def list?
|
134
323
|
@data.list?
|
135
324
|
end
|
136
325
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
326
|
+
# Iterates over Vector elements or returns a Enumerator.
|
327
|
+
#
|
328
|
+
# @overload each
|
329
|
+
# Returns a new Enumerator if no block given.
|
330
|
+
#
|
331
|
+
# @return [Enumerator]
|
332
|
+
# Enumerator of each elements.
|
333
|
+
#
|
334
|
+
# @overload each
|
335
|
+
# When a block given, passes each element in self to the block.
|
336
|
+
#
|
337
|
+
# @yieldparam element [Object]
|
338
|
+
# passes element by a block parameter.
|
339
|
+
# @yieldreturn [Object]
|
340
|
+
# evaluated result value from the block.
|
341
|
+
# @return [self]
|
342
|
+
# returns self.
|
343
|
+
#
|
141
344
|
def each
|
142
345
|
return enum_for(:each) unless block_given?
|
143
346
|
|
144
347
|
size.times do |i|
|
145
348
|
yield data[i]
|
146
349
|
end
|
350
|
+
self
|
147
351
|
end
|
148
352
|
|
353
|
+
# Returns a Vector from collected objects from the block.
|
354
|
+
#
|
355
|
+
# @overload map
|
356
|
+
# Returns a new Enumerator if no block given.
|
357
|
+
#
|
358
|
+
# @return [Enumerator]
|
359
|
+
# a new Enumerator.
|
360
|
+
#
|
361
|
+
# @overload map
|
362
|
+
# When a block given, calls the block with successive elements.
|
363
|
+
# Returns a Vector of the objects returned by the block.
|
364
|
+
#
|
365
|
+
# @yieldparam element [Object]
|
366
|
+
# passes element by a block parameter.
|
367
|
+
# @yieldreturn [Object]
|
368
|
+
# evaluated result value from the block.
|
369
|
+
# @return [self]
|
370
|
+
# returns the collected values from the block as a Vector.
|
371
|
+
#
|
149
372
|
def map(&block)
|
150
373
|
return enum_for(:map) unless block
|
151
374
|
|
@@ -153,18 +376,35 @@ module RedAmber
|
|
153
376
|
end
|
154
377
|
alias_method :collect, :map
|
155
378
|
|
156
|
-
#
|
379
|
+
# Tests wheather self is chunked or not.
|
380
|
+
#
|
381
|
+
# @api private
|
382
|
+
# @return [true, false]
|
383
|
+
# returns true if #data is chunked.
|
384
|
+
#
|
157
385
|
def chunked?
|
158
386
|
@data.is_a? Arrow::ChunkedArray
|
159
387
|
end
|
160
388
|
|
161
|
-
#
|
389
|
+
# Returns the number of chunks.
|
390
|
+
#
|
391
|
+
# @api private
|
392
|
+
# @return [Integer]
|
393
|
+
# the number of chunks. If self is not chunked, returns zero.
|
394
|
+
#
|
162
395
|
def n_chunks
|
163
396
|
chunked? ? @data.n_chunks : 0
|
164
397
|
end
|
165
398
|
|
166
399
|
# def each_chunk() end
|
167
400
|
|
401
|
+
# Returns a hash containing the counts of equal elements.
|
402
|
+
#
|
403
|
+
# - Each key is an element of self.
|
404
|
+
# - Each value is the number of elements equal to the key.
|
405
|
+
# @return [Hash]
|
406
|
+
# result in a Hash.
|
407
|
+
#
|
168
408
|
def tally
|
169
409
|
hash = values.tally
|
170
410
|
if (type_class < Arrow::FloatingPointDataType) && is_nan.any
|
@@ -180,22 +420,156 @@ module RedAmber
|
|
180
420
|
hash
|
181
421
|
end
|
182
422
|
|
423
|
+
# @api private
|
424
|
+
# Arrow imprementation of #tally
|
183
425
|
def value_counts
|
184
426
|
values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
|
185
427
|
values.zip(counts).to_h
|
186
428
|
end
|
187
429
|
|
430
|
+
# Count nils in self.
|
431
|
+
#
|
432
|
+
# @return [Integer]
|
433
|
+
# the number of nils.
|
434
|
+
#
|
188
435
|
def n_nulls
|
189
436
|
@data.n_nulls
|
190
437
|
end
|
191
438
|
alias_method :n_nils, :n_nulls
|
192
439
|
|
440
|
+
# Count NaNs in self if self is a numeric Vector
|
441
|
+
#
|
442
|
+
# @return [Integer]
|
443
|
+
# the number of Float::NANs. If self is not a numeric Vector,
|
444
|
+
# returns 0.
|
445
|
+
#
|
193
446
|
def n_nans
|
194
447
|
numeric? ? is_nan.to_a.count(true) : 0
|
195
448
|
end
|
196
449
|
|
450
|
+
# Return true if self has any nil.
|
451
|
+
#
|
452
|
+
# @return [true, false]
|
453
|
+
# true or false.
|
454
|
+
#
|
197
455
|
def has_nil?
|
198
456
|
is_nil.any
|
199
457
|
end
|
458
|
+
|
459
|
+
# Enable to compute with coercion mechanism.
|
460
|
+
#
|
461
|
+
# @example
|
462
|
+
# vector = Vector.new(1,2,3)
|
463
|
+
#
|
464
|
+
# # =>
|
465
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
466
|
+
# [1, 2, 3]
|
467
|
+
#
|
468
|
+
# # Vector's `#*` method
|
469
|
+
# vector * -1
|
470
|
+
#
|
471
|
+
# # =>
|
472
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000e3698>
|
473
|
+
# [-1, -2, -3]
|
474
|
+
#
|
475
|
+
# # coerced calculation
|
476
|
+
# -1 * vector
|
477
|
+
#
|
478
|
+
# # =>
|
479
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000ea4ac>
|
480
|
+
# [-1, -2, -3]
|
481
|
+
#
|
482
|
+
# # `@-` operator
|
483
|
+
# -vector
|
484
|
+
#
|
485
|
+
# # =>
|
486
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000ee7b4>
|
487
|
+
# [255, 254, 253]
|
488
|
+
#
|
489
|
+
def coerce(other)
|
490
|
+
[Vector.new(Array(other) * size), self]
|
491
|
+
end
|
492
|
+
|
493
|
+
# Spread the return value of an aggregate function as if
|
494
|
+
# it is a element-wise function.
|
495
|
+
#
|
496
|
+
# @overload propagate(function)
|
497
|
+
# Returns a Vector of same size as self spreading the value from function.
|
498
|
+
#
|
499
|
+
# @param function [Symbol] a name of aggregation function for self.
|
500
|
+
# Return value of the function must be a scalar.
|
501
|
+
# @return [Vector] Returns a Vector that is the same size as self
|
502
|
+
# and such that all elements are the same as the result of aggregation `function`.
|
503
|
+
# @example propagate by an aggragation function name
|
504
|
+
# vec = Vector.new(1, 2, 3, 4)
|
505
|
+
# vec.propagate(:mean)
|
506
|
+
# # =>
|
507
|
+
# #<RedAmber::Vector(:double, size=4):0x000000000001985c>
|
508
|
+
# [2.5, 2.5, 2.5, 2.5]
|
509
|
+
#
|
510
|
+
# @overload propagate
|
511
|
+
# Returns a Vector of same size as self spreading the value from block.
|
512
|
+
#
|
513
|
+
# @yieldparam self [Vector]
|
514
|
+
# gives self to the block.
|
515
|
+
# @yieldreturn [scalar]
|
516
|
+
# a scalar value.
|
517
|
+
# @return [Vector]
|
518
|
+
# returns a Vector that is the same size as self
|
519
|
+
# and such that all elements are the same as the yielded value from the block.
|
520
|
+
# @example propagate by a block
|
521
|
+
# vec.propagate { |v| v.mean.round }
|
522
|
+
# # =>
|
523
|
+
# #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
|
524
|
+
# [3, 3, 3, 3]
|
525
|
+
#
|
526
|
+
# @since 0.4.0
|
527
|
+
#
|
528
|
+
def propagate(function = nil, &block)
|
529
|
+
value =
|
530
|
+
if block
|
531
|
+
raise VectorArgumentError, "can't specify both function and block" if function
|
532
|
+
|
533
|
+
yield self
|
534
|
+
else
|
535
|
+
function = function&.to_sym
|
536
|
+
unless function && respond_to?(function) && Vector.aggregate?(function)
|
537
|
+
raise VectorArgumentError, "illegal function: #{function.inspect}"
|
538
|
+
end
|
539
|
+
|
540
|
+
send(function)
|
541
|
+
end
|
542
|
+
Vector.new([value] * size)
|
543
|
+
end
|
544
|
+
alias_method :expand, :propagate
|
545
|
+
|
546
|
+
private # =======
|
547
|
+
|
548
|
+
def exec_func_unary(function, options)
|
549
|
+
options = nil if options.empty?
|
550
|
+
find(function).execute([data], options)
|
551
|
+
end
|
552
|
+
|
553
|
+
def exec_func_binary(function, other, options)
|
554
|
+
options = nil if options.empty?
|
555
|
+
case other
|
556
|
+
when Vector
|
557
|
+
find(function).execute([data, other.data], options)
|
558
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
559
|
+
Array, Numeric, String, TrueClass, FalseClass
|
560
|
+
find(function).execute([data, other], options)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
def get_scalar(datum)
|
565
|
+
output = datum.value
|
566
|
+
case output
|
567
|
+
when Arrow::StringScalar then output.to_s
|
568
|
+
when Arrow::StructScalar
|
569
|
+
output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
|
570
|
+
else
|
571
|
+
output.value
|
572
|
+
end
|
573
|
+
end
|
200
574
|
end
|
201
575
|
end
|