red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/vector.rb
CHANGED
@@ -5,15 +5,22 @@ module RedAmber
|
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include
|
8
|
+
include Enumerable
|
9
|
+
include Helper
|
10
|
+
include ArrowFunction
|
9
11
|
include VectorUpdatable
|
10
12
|
include VectorSelectable
|
11
|
-
include Helper
|
12
13
|
|
13
14
|
using RefineArrayLike
|
14
15
|
|
15
16
|
# Quicker constructor of Vector.
|
16
17
|
#
|
18
|
+
# @param arrow_array [Arrow::Array]
|
19
|
+
# Arrow::Array object to have in the Vector.
|
20
|
+
# @return [Vector]
|
21
|
+
# created Vector.
|
22
|
+
# @note This method doesn't check argment type.
|
23
|
+
#
|
17
24
|
def self.create(arrow_array)
|
18
25
|
instance = allocate
|
19
26
|
instance.instance_variable_set(:@data, arrow_array)
|
@@ -22,6 +29,11 @@ module RedAmber
|
|
22
29
|
|
23
30
|
# Create a Vector.
|
24
31
|
#
|
32
|
+
# @param array [Array, Vector, Range, Arrow::Array, #to_arrow_array]
|
33
|
+
# array-like.
|
34
|
+
# @return [Vector]
|
35
|
+
# created Vector.
|
36
|
+
#
|
25
37
|
# @note default is headless Vector and '@key == nil'
|
26
38
|
def initialize(*array)
|
27
39
|
@data =
|
@@ -39,15 +51,99 @@ module RedAmber
|
|
39
51
|
end
|
40
52
|
end
|
41
53
|
|
54
|
+
# Entity of Vector.
|
55
|
+
#
|
56
|
+
# @return [Arrow::Array]
|
57
|
+
#
|
42
58
|
attr_reader :data
|
43
59
|
alias_method :to_arrow_array, :data
|
44
60
|
|
61
|
+
# Associated key name when self is in a DataFrame.
|
62
|
+
#
|
63
|
+
# Default Vector is 'head-less' (key-less).
|
64
|
+
# @return [Symbol]
|
65
|
+
#
|
45
66
|
attr_accessor :key
|
46
67
|
|
68
|
+
# Return other as a Vector which is same data type as self.
|
69
|
+
#
|
70
|
+
# @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
71
|
+
# a source array-like which will be converted.
|
72
|
+
# @return [Vector]
|
73
|
+
# resolved Vector.
|
74
|
+
# @example Integer to String
|
75
|
+
# Vector.new('A').resolve([1, 2])
|
76
|
+
#
|
77
|
+
# # =>
|
78
|
+
# #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
|
79
|
+
# ["1", "2"]
|
80
|
+
#
|
81
|
+
# @example String to Ineger
|
82
|
+
# Vector.new(1).resolve(['A'])
|
83
|
+
#
|
84
|
+
# # =>
|
85
|
+
# #<RedAmber::Vector(:uint8, size=1):0x00000000000037dc>
|
86
|
+
# [65]
|
87
|
+
#
|
88
|
+
# @example Upcast to uint16
|
89
|
+
# vector = Vector.new(256)
|
90
|
+
#
|
91
|
+
# # =>
|
92
|
+
# #<RedAmber::Vector(:uint16, size=1):0x000000000000c1fc>
|
93
|
+
# [256]
|
94
|
+
#
|
95
|
+
# vector.resolve([1, 2])
|
96
|
+
#
|
97
|
+
# # =>
|
98
|
+
# # Not a uint8 Vector
|
99
|
+
# #<RedAmber::Vector(:uint16, size=2):0x000000000000c328>
|
100
|
+
# [1, 2]
|
101
|
+
#
|
102
|
+
# @since 0.4.0
|
103
|
+
#
|
104
|
+
def resolve(other)
|
105
|
+
case other
|
106
|
+
when Vector
|
107
|
+
Vector.create(data.resolve(other.data))
|
108
|
+
when Array, Arrow::Array, Arrow::ChunkedArray
|
109
|
+
Vector.create(data.resolve(other))
|
110
|
+
else
|
111
|
+
raise VectorArgumentError, "invalid argument: #{other}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# String representation of self like an Array.
|
116
|
+
#
|
117
|
+
# @return [String]
|
118
|
+
# return self as same as Array's inspect.
|
119
|
+
#
|
47
120
|
def to_s
|
48
121
|
@data.to_a.inspect
|
49
122
|
end
|
50
123
|
|
124
|
+
# String representation of self.
|
125
|
+
#
|
126
|
+
# According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
|
127
|
+
# - If it is 'MINIMUM', returns class and size.
|
128
|
+
# - If it is otherwise, returns class, size and preview.
|
129
|
+
# Default value of the ENV is 'Table'.
|
130
|
+
# @param limit [Integer]
|
131
|
+
# max width of the result.
|
132
|
+
# @return [String]
|
133
|
+
# show information of self as a String.
|
134
|
+
# @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
|
135
|
+
# puts vector.inspect
|
136
|
+
#
|
137
|
+
# # =>
|
138
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000037f0>
|
139
|
+
# [1, 2, 3]
|
140
|
+
#
|
141
|
+
# @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
|
142
|
+
# puts vector.inspect
|
143
|
+
#
|
144
|
+
# # =>
|
145
|
+
# RedAmber::Vector(:uint8, size=3)
|
146
|
+
#
|
51
147
|
def inspect(limit: 80)
|
52
148
|
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
53
149
|
# Better performance than `.upcase == 'MINIMUM'`
|
@@ -70,82 +166,187 @@ module RedAmber
|
|
70
166
|
end
|
71
167
|
end
|
72
168
|
|
169
|
+
# Convert to an Array.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
# array representation.
|
173
|
+
#
|
73
174
|
def to_ary
|
74
175
|
@data.values
|
75
176
|
end
|
76
|
-
|
77
177
|
alias_method :to_a, :to_ary
|
78
178
|
alias_method :values, :to_ary
|
79
179
|
alias_method :entries, :to_ary
|
80
180
|
|
181
|
+
# Indeces from 0 to size-1 by Array.
|
182
|
+
#
|
183
|
+
# @return [Array]
|
184
|
+
# indices.
|
185
|
+
#
|
81
186
|
def indices
|
82
187
|
(0...size).to_a
|
83
188
|
end
|
84
|
-
|
85
189
|
alias_method :indexes, :indices
|
86
190
|
alias_method :indeces, :indices
|
87
191
|
|
192
|
+
# Vector size.
|
193
|
+
#
|
194
|
+
# @return [Integer]
|
195
|
+
# size of self.
|
196
|
+
#
|
88
197
|
def size
|
89
198
|
# only defined :length in Arrow?
|
90
199
|
@data.length
|
91
200
|
end
|
92
|
-
|
93
201
|
alias_method :length, :size
|
94
202
|
alias_method :n_rows, :size
|
95
203
|
alias_method :nrow, :size
|
96
204
|
|
205
|
+
# Test wheather self is empty.
|
206
|
+
#
|
207
|
+
# @return [true, false]
|
208
|
+
# true if self is empty.
|
209
|
+
#
|
97
210
|
def empty?
|
98
211
|
size.zero?
|
99
212
|
end
|
100
213
|
|
214
|
+
# Type nickname of self.
|
215
|
+
#
|
216
|
+
# @return [Symbol]
|
217
|
+
# type nickname of values.
|
218
|
+
#
|
101
219
|
def type
|
102
220
|
list? ? :list : @data.value_type.nick.to_sym
|
103
221
|
end
|
104
222
|
|
223
|
+
# Type Class of self.
|
224
|
+
#
|
225
|
+
# @return [type_Class]
|
226
|
+
# type class.
|
227
|
+
#
|
228
|
+
def type_class
|
229
|
+
@data.type_class
|
230
|
+
end
|
231
|
+
|
232
|
+
# Test if self is a boolean Vector.
|
233
|
+
#
|
234
|
+
# @return [true, false]
|
235
|
+
# test result.
|
236
|
+
#
|
105
237
|
def boolean?
|
106
238
|
@data.boolean?
|
107
239
|
end
|
108
240
|
|
241
|
+
# Test if self is a numeric Vector.
|
242
|
+
#
|
243
|
+
# @return [true, false]
|
244
|
+
# test result.
|
245
|
+
#
|
109
246
|
def numeric?
|
110
247
|
@data.numeric?
|
111
248
|
end
|
112
249
|
|
250
|
+
# Test if self is a float Vector.
|
251
|
+
#
|
252
|
+
# @return [true, false]
|
253
|
+
# test result.
|
254
|
+
#
|
113
255
|
def float?
|
114
256
|
@data.float?
|
115
257
|
end
|
116
258
|
|
259
|
+
# Test if self is a integer Vector.
|
260
|
+
#
|
261
|
+
# @return [true, false]
|
262
|
+
# test result.
|
263
|
+
#
|
117
264
|
def integer?
|
118
265
|
@data.integer?
|
119
266
|
end
|
120
267
|
|
268
|
+
# Test if self is a string Vector.
|
269
|
+
#
|
270
|
+
# @return [true, false]
|
271
|
+
# test result.
|
272
|
+
#
|
121
273
|
def string?
|
122
274
|
@data.string?
|
123
275
|
end
|
124
276
|
|
277
|
+
# Test if self is a dictionary Vector.
|
278
|
+
#
|
279
|
+
# @return [true, false]
|
280
|
+
# test result.
|
281
|
+
#
|
125
282
|
def dictionary?
|
126
283
|
@data.dictionary?
|
127
284
|
end
|
128
285
|
|
286
|
+
# Test if self is a temporal Vector.
|
287
|
+
#
|
288
|
+
# @return [true, false]
|
289
|
+
# test result.
|
290
|
+
#
|
129
291
|
def temporal?
|
130
292
|
@data.temporal?
|
131
293
|
end
|
132
294
|
|
295
|
+
# Test if self is a list Vector.
|
296
|
+
#
|
297
|
+
# @return [true, false]
|
298
|
+
# test result.
|
299
|
+
#
|
133
300
|
def list?
|
134
301
|
@data.list?
|
135
302
|
end
|
136
303
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
304
|
+
# Iterates over Vector elements or returns a Enumerator.
|
305
|
+
#
|
306
|
+
# @overload each
|
307
|
+
# Returns a new Enumerator if no block given.
|
308
|
+
#
|
309
|
+
# @return [Enumerator]
|
310
|
+
# Enumerator of each elements.
|
311
|
+
#
|
312
|
+
# @overload each
|
313
|
+
# When a block given, passes each element in self to the block.
|
314
|
+
#
|
315
|
+
# @yieldparam element [Object]
|
316
|
+
# passes element by a block parameter.
|
317
|
+
# @yieldreturn [Object]
|
318
|
+
# evaluated result value from the block.
|
319
|
+
# @return [self]
|
320
|
+
# returns self.
|
321
|
+
#
|
141
322
|
def each
|
142
323
|
return enum_for(:each) unless block_given?
|
143
324
|
|
144
325
|
size.times do |i|
|
145
326
|
yield data[i]
|
146
327
|
end
|
328
|
+
self
|
147
329
|
end
|
148
330
|
|
331
|
+
# Returns a Vector from collected objects from the block.
|
332
|
+
#
|
333
|
+
# @overload map
|
334
|
+
# Returns a new Enumerator if no block given.
|
335
|
+
#
|
336
|
+
# @return [Enumerator]
|
337
|
+
# a new Enumerator.
|
338
|
+
#
|
339
|
+
# @overload map
|
340
|
+
# When a block given, calls the block with successive elements.
|
341
|
+
# Returns a Vector of the objects returned by the block.
|
342
|
+
#
|
343
|
+
# @yieldparam element [Object]
|
344
|
+
# passes element by a block parameter.
|
345
|
+
# @yieldreturn [Object]
|
346
|
+
# evaluated result value from the block.
|
347
|
+
# @return [self]
|
348
|
+
# returns the collected values from the block as a Vector.
|
349
|
+
#
|
149
350
|
def map(&block)
|
150
351
|
return enum_for(:map) unless block
|
151
352
|
|
@@ -153,18 +354,35 @@ module RedAmber
|
|
153
354
|
end
|
154
355
|
alias_method :collect, :map
|
155
356
|
|
156
|
-
#
|
357
|
+
# Tests wheather self is chunked or not.
|
358
|
+
#
|
359
|
+
# @api private
|
360
|
+
# @return [true, false]
|
361
|
+
# returns true if #data is chunked.
|
362
|
+
#
|
157
363
|
def chunked?
|
158
364
|
@data.is_a? Arrow::ChunkedArray
|
159
365
|
end
|
160
366
|
|
161
|
-
#
|
367
|
+
# Returns the number of chunks.
|
368
|
+
#
|
369
|
+
# @api private
|
370
|
+
# @return [Integer]
|
371
|
+
# the number of chunks. If self is not chunked, returns zero.
|
372
|
+
#
|
162
373
|
def n_chunks
|
163
374
|
chunked? ? @data.n_chunks : 0
|
164
375
|
end
|
165
376
|
|
166
377
|
# def each_chunk() end
|
167
378
|
|
379
|
+
# Returns a hash containing the counts of equal elements.
|
380
|
+
#
|
381
|
+
# - Each key is an element of self.
|
382
|
+
# - Each value is the number of elements equal to the key.
|
383
|
+
# @return [Hash]
|
384
|
+
# result in a Hash.
|
385
|
+
#
|
168
386
|
def tally
|
169
387
|
hash = values.tally
|
170
388
|
if (type_class < Arrow::FloatingPointDataType) && is_nan.any
|
@@ -180,22 +398,155 @@ module RedAmber
|
|
180
398
|
hash
|
181
399
|
end
|
182
400
|
|
401
|
+
# @api private
|
402
|
+
# Arrow imprementation of #tally
|
183
403
|
def value_counts
|
184
404
|
values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
|
185
405
|
values.zip(counts).to_h
|
186
406
|
end
|
187
407
|
|
408
|
+
# Count nils in self.
|
409
|
+
#
|
410
|
+
# @return [Integer]
|
411
|
+
# the number of nils.
|
412
|
+
#
|
188
413
|
def n_nulls
|
189
414
|
@data.n_nulls
|
190
415
|
end
|
191
416
|
alias_method :n_nils, :n_nulls
|
192
417
|
|
418
|
+
# Count NaNs in self if self is a numeric Vector
|
419
|
+
#
|
420
|
+
# @return [Integer]
|
421
|
+
# the number of Float::NANs. If self is not a numeric Vector,
|
422
|
+
# returns 0.
|
423
|
+
#
|
193
424
|
def n_nans
|
194
425
|
numeric? ? is_nan.to_a.count(true) : 0
|
195
426
|
end
|
196
427
|
|
428
|
+
# Return true if self has any nil.
|
429
|
+
#
|
430
|
+
# @return [true, false]
|
431
|
+
# true or false.
|
432
|
+
#
|
197
433
|
def has_nil?
|
198
434
|
is_nil.any
|
199
435
|
end
|
436
|
+
|
437
|
+
# Enable to compute with coercion mechanism.
|
438
|
+
#
|
439
|
+
# @example
|
440
|
+
# vector = Vector.new(1,2,3)
|
441
|
+
#
|
442
|
+
# # =>
|
443
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
|
444
|
+
# [1, 2, 3]
|
445
|
+
#
|
446
|
+
# # Vector's `#*` method
|
447
|
+
# vector * -1
|
448
|
+
#
|
449
|
+
# # =>
|
450
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000e3698>
|
451
|
+
# [-1, -2, -3]
|
452
|
+
#
|
453
|
+
# # coerced calculation
|
454
|
+
# -1 * vector
|
455
|
+
#
|
456
|
+
# # =>
|
457
|
+
# #<RedAmber::Vector(:int16, size=3):0x00000000000ea4ac>
|
458
|
+
# [-1, -2, -3]
|
459
|
+
#
|
460
|
+
# # `@-` operator
|
461
|
+
# -vector
|
462
|
+
#
|
463
|
+
# # =>
|
464
|
+
# #<RedAmber::Vector(:uint8, size=3):0x00000000000ee7b4>
|
465
|
+
# [255, 254, 253]
|
466
|
+
#
|
467
|
+
def coerce(other)
|
468
|
+
[Vector.new(Array(other) * size), self]
|
469
|
+
end
|
470
|
+
|
471
|
+
# Spread the return value of an aggregate function as if
|
472
|
+
# it is a element-wise function.
|
473
|
+
#
|
474
|
+
# @overload propagate(function)
|
475
|
+
# Returns a Vector of same size as self spreading the value from function.
|
476
|
+
#
|
477
|
+
# @param function [Symbol] a name of aggregation function for self.
|
478
|
+
# Return value of the function must be a scalar.
|
479
|
+
# @return [Vector] Returns a Vector that is the same size as self
|
480
|
+
# and such that all elements are the same as the result of aggregation `function`.
|
481
|
+
# @example propagate by an aggragation function name
|
482
|
+
# vec = Vector.new(1, 2, 3, 4)
|
483
|
+
# vec.propagate(:mean)
|
484
|
+
# # =>
|
485
|
+
# #<RedAmber::Vector(:double, size=4):0x000000000001985c>
|
486
|
+
# [2.5, 2.5, 2.5, 2.5]
|
487
|
+
#
|
488
|
+
# @overload propagate
|
489
|
+
# Returns a Vector of same size as self spreading the value from block.
|
490
|
+
#
|
491
|
+
# @yieldparam self [Vector]
|
492
|
+
# gives self to the block.
|
493
|
+
# @yieldreturn [scalar]
|
494
|
+
# a scalar value.
|
495
|
+
# @return [Vector]
|
496
|
+
# returns a Vector that is the same size as self
|
497
|
+
# and such that all elements are the same as the yielded value from the block.
|
498
|
+
# @example propagate by a block
|
499
|
+
# vec.propagate { |v| v.mean.round }
|
500
|
+
# # =>
|
501
|
+
# #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
|
502
|
+
# [3, 3, 3, 3]
|
503
|
+
#
|
504
|
+
# @since 0.4.0
|
505
|
+
#
|
506
|
+
def propagate(function = nil, &block)
|
507
|
+
value =
|
508
|
+
if block
|
509
|
+
raise VectorArgumentError, "can't specify both function and block" if function
|
510
|
+
|
511
|
+
yield self
|
512
|
+
else
|
513
|
+
send(function&.to_sym)
|
514
|
+
end
|
515
|
+
raise VectorArgumentError, 'not an aggregation function' if value.is_a?(Vector)
|
516
|
+
|
517
|
+
Vector.new([value] * size)
|
518
|
+
end
|
519
|
+
alias_method :expand, :propagate
|
520
|
+
|
521
|
+
private # =======
|
522
|
+
|
523
|
+
def exec_func_unary(function, options)
|
524
|
+
options = nil if options.empty?
|
525
|
+
find(function).execute([data], options)
|
526
|
+
end
|
527
|
+
|
528
|
+
def exec_func_binary(function, other, options)
|
529
|
+
options = nil if options.empty?
|
530
|
+
case other
|
531
|
+
when Vector
|
532
|
+
find(function).execute([data, other.data], options)
|
533
|
+
when NilClass
|
534
|
+
nils = data.class.new([nil] * size)
|
535
|
+
find(function).execute([data, nils], options)
|
536
|
+
else
|
537
|
+
find(function).execute([data, other], options)
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
def get_scalar(datum)
|
542
|
+
output = datum.value
|
543
|
+
case output
|
544
|
+
when Arrow::StringScalar then output.to_s
|
545
|
+
when Arrow::StructScalar
|
546
|
+
output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
|
547
|
+
else
|
548
|
+
output.value
|
549
|
+
end
|
550
|
+
end
|
200
551
|
end
|
201
552
|
end
|