red_amber 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -2
- data/CHANGELOG.md +57 -0
- data/Gemfile +5 -6
- data/README.ja.md +252 -0
- data/README.md +19 -14
- data/benchmark/group.yml +12 -5
- data/docker/Gemfile +8 -3
- data/docker/Gemfile.lock +54 -16
- data/docker/example +29 -17
- data/lib/red_amber/data_frame.rb +78 -4
- data/lib/red_amber/data_frame_combinable.rb +147 -119
- data/lib/red_amber/data_frame_displayable.rb +6 -5
- data/lib/red_amber/data_frame_selectable.rb +49 -0
- data/lib/red_amber/group.rb +191 -90
- data/lib/red_amber/helper.rb +26 -0
- data/lib/red_amber/subframes.rb +12 -5
- data/lib/red_amber/vector.rb +43 -24
- data/lib/red_amber/vector_aggregation.rb +26 -0
- data/lib/red_amber/vector_selectable.rb +74 -23
- data/lib/red_amber/vector_string_function.rb +211 -0
- data/lib/red_amber/vector_unary_element_wise.rb +4 -0
- data/lib/red_amber/vector_updatable.rb +28 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -1
- data/red_amber.gemspec +1 -1
- metadata +7 -5
@@ -836,6 +836,55 @@ module RedAmber
|
|
836
836
|
tail(n_obs)
|
837
837
|
end
|
838
838
|
|
839
|
+
# Select records randomly to create a DataFrame.
|
840
|
+
# This method calls `indices.sample`.
|
841
|
+
# We can use the same arguments in `Vector#sample`.
|
842
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
843
|
+
#
|
844
|
+
# @overload sample()
|
845
|
+
# Return a DataFrame with a randomly selected record.
|
846
|
+
#
|
847
|
+
# @return [DataFrame]
|
848
|
+
# a DataFrame with single record.
|
849
|
+
#
|
850
|
+
# @overload sample(n)
|
851
|
+
# Return a DataFrame with n records selected at random.
|
852
|
+
#
|
853
|
+
# @param n [Integer]
|
854
|
+
# positive number of records to select.
|
855
|
+
# If n is smaller or equal to size, records are selected by non-repeating.
|
856
|
+
# If n is greater than `size`, records are selected repeatedly.
|
857
|
+
# @return [DataFrame]
|
858
|
+
# a DataFrame with sampled records.
|
859
|
+
#
|
860
|
+
# @overload sample(prop)
|
861
|
+
# Return a DataFrame with records by proportion `prop` at random.
|
862
|
+
#
|
863
|
+
# @param prop [Float]
|
864
|
+
# positive proportion of records to select.
|
865
|
+
# Absolute number of records to select:`prop*size` is rounded (by `half: :up`).
|
866
|
+
# If prop is smaller or equal to 1.0, records are selected by non-repeating.
|
867
|
+
# If prop is greater than 1.0, some records are selected repeatedly.
|
868
|
+
# @return [Vector]
|
869
|
+
# a DataFrame with sampled records.
|
870
|
+
#
|
871
|
+
# @since 0.5.0
|
872
|
+
#
|
873
|
+
def sample(n_or_prop = nil)
|
874
|
+
slice { indices.sample(n_or_prop) }
|
875
|
+
end
|
876
|
+
|
877
|
+
# Returns a DataFrame with shuffled rows.
|
878
|
+
#
|
879
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
880
|
+
# @note Same behavior as `DataFrame#sample(1.0)`
|
881
|
+
# @return (see #sample)
|
882
|
+
# @since 0.5.0
|
883
|
+
#
|
884
|
+
def shuffle
|
885
|
+
sample(1.0)
|
886
|
+
end
|
887
|
+
|
839
888
|
# Select records by index Array to create a DataFrame.
|
840
889
|
#
|
841
890
|
# - TODO: support for option `boundscheck: true`
|
data/lib/red_amber/group.rb
CHANGED
@@ -4,6 +4,7 @@ module RedAmber
|
|
4
4
|
# Group class
|
5
5
|
class Group
|
6
6
|
include Enumerable # This feature is experimental
|
7
|
+
include Helper
|
7
8
|
|
8
9
|
using RefineArrowTable
|
9
10
|
|
@@ -114,15 +115,27 @@ module RedAmber
|
|
114
115
|
#
|
115
116
|
def filters
|
116
117
|
@filters ||= begin
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
118
|
+
group_values = group_table[group_keys].each_record.map(&:to_a)
|
119
|
+
|
120
|
+
Enumerator.new(group_table.n_rows) do |yielder|
|
121
|
+
group_values.each do |values|
|
122
|
+
booleans =
|
123
|
+
values.map.with_index do |value, i|
|
124
|
+
column = @dataframe[group_keys[i]].data
|
125
|
+
if value.nil?
|
126
|
+
Arrow::Function.find('is_null').execute([column])
|
127
|
+
elsif value.is_a?(Float) && value.nan?
|
128
|
+
Arrow::Function.find('is_nan').execute([column])
|
129
|
+
else
|
130
|
+
Arrow::Function.find('equal').execute([column, value])
|
131
|
+
end
|
132
|
+
end
|
133
|
+
filter =
|
134
|
+
booleans.reduce do |result, datum|
|
135
|
+
Arrow::Function.find('and_kleene').execute([result, datum])
|
136
|
+
end
|
137
|
+
yielder << Vector.create(filter.value)
|
138
|
+
end
|
126
139
|
end
|
127
140
|
end
|
128
141
|
end
|
@@ -147,11 +160,10 @@ module RedAmber
|
|
147
160
|
# group size.
|
148
161
|
#
|
149
162
|
def each
|
150
|
-
filters
|
151
163
|
return enum_for(:each) unless block_given?
|
152
164
|
|
153
|
-
|
154
|
-
yield @dataframe
|
165
|
+
filters.each do |filter|
|
166
|
+
yield @dataframe.filter(filter)
|
155
167
|
end
|
156
168
|
@filters.size
|
157
169
|
end
|
@@ -174,7 +186,7 @@ module RedAmber
|
|
174
186
|
# 2 Gentoo 124
|
175
187
|
#
|
176
188
|
def group_count
|
177
|
-
DataFrame.create(
|
189
|
+
DataFrame.create(group_table)
|
178
190
|
end
|
179
191
|
|
180
192
|
# String representation of self.
|
@@ -186,80 +198,157 @@ module RedAmber
|
|
186
198
|
#
|
187
199
|
# # =>
|
188
200
|
# #<RedAmber::Group : 0x0000000000003a98>
|
189
|
-
# species
|
190
|
-
# <string>
|
191
|
-
# 0 Adelie
|
192
|
-
# 1 Chinstrap
|
193
|
-
# 2 Gentoo
|
201
|
+
# species group_count
|
202
|
+
# <string> <uint8>
|
203
|
+
# 0 Adelie 152
|
204
|
+
# 1 Chinstrap 68
|
205
|
+
# 2 Gentoo 124
|
194
206
|
#
|
195
207
|
def inspect
|
196
|
-
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{
|
208
|
+
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
|
197
209
|
end
|
198
210
|
|
199
211
|
# Summarize Group by aggregation functions from the block.
|
200
212
|
#
|
201
|
-
# @
|
202
|
-
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
#
|
206
|
-
#
|
207
|
-
#
|
208
|
-
#
|
209
|
-
#
|
213
|
+
# @overload summarize
|
214
|
+
# Summarize by a function.
|
215
|
+
# @yieldparam group [Group]
|
216
|
+
# passes group object self.
|
217
|
+
# @yieldreturn [DataFrame]
|
218
|
+
# @yieldreturn [DataFrame, Array<DataFrame>, Hash{Symbol, String => DataFrame}]
|
219
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
220
|
+
# @return [DataFrame]
|
221
|
+
# summarized DataFrame.
|
222
|
+
# @example Single function and single variable
|
223
|
+
# group = penguins.group(:species)
|
224
|
+
# group
|
210
225
|
#
|
211
|
-
#
|
212
|
-
#
|
213
|
-
#
|
214
|
-
#
|
215
|
-
#
|
216
|
-
#
|
217
|
-
#
|
226
|
+
# # =>
|
227
|
+
# #<RedAmber::Group : 0x000000000000c314>
|
228
|
+
# species group_count
|
229
|
+
# <string> <uint8>
|
230
|
+
# 0 Adelie 152
|
231
|
+
# 1 Chinstrap 68
|
232
|
+
# 2 Gentoo 124
|
218
233
|
#
|
219
|
-
#
|
234
|
+
# group.summarize { mean(:bill_length_mm) }
|
220
235
|
#
|
221
|
-
#
|
222
|
-
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
#
|
227
|
-
#
|
236
|
+
# # =>
|
237
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c364>
|
238
|
+
# species mean(bill_length_mm)
|
239
|
+
# <string> <double>
|
240
|
+
# 0 Adelie 38.79
|
241
|
+
# 1 Chinstrap 48.83
|
242
|
+
# 2 Gentoo 47.5
|
228
243
|
#
|
229
|
-
#
|
230
|
-
#
|
244
|
+
# @example Single function only
|
245
|
+
# group.summarize { mean }
|
231
246
|
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
236
|
-
#
|
237
|
-
#
|
238
|
-
#
|
247
|
+
# # =>
|
248
|
+
# #<RedAmber::DataFrame : 3 x 6 Vectors, 0x000000000000c350>
|
249
|
+
# species mean(bill_length_mm) mean(bill_depth_mm) ... mean(year)
|
250
|
+
# <string> <double> <double> ... <double>
|
251
|
+
# 0 Adelie 38.79 18.35 ... 2008.01
|
252
|
+
# 1 Chinstrap 48.83 18.42 ... 2007.97
|
253
|
+
# 2 Gentoo 47.5 14.98 ... 2008.08
|
239
254
|
#
|
240
|
-
# @
|
241
|
-
#
|
255
|
+
# @overload summarize
|
256
|
+
# Summarize by a function.
|
242
257
|
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
247
|
-
#
|
248
|
-
#
|
249
|
-
#
|
250
|
-
#
|
251
|
-
|
252
|
-
|
258
|
+
# @yieldparam group [Group]
|
259
|
+
# passes group object self.
|
260
|
+
# @yieldreturn [Array<DataFrame>]
|
261
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
262
|
+
# @return [DataFrame]
|
263
|
+
# summarized DataFrame.
|
264
|
+
# @example Multiple functions
|
265
|
+
# group.summarize { [min(:bill_length_mm), max(:bill_length_mm)] }
|
266
|
+
#
|
267
|
+
# # =>
|
268
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
269
|
+
# species min(bill_length_mm) max(bill_length_mm)
|
270
|
+
# <string> <double> <double>
|
271
|
+
# 0 Adelie 32.1 46.0
|
272
|
+
# 1 Chinstrap 40.9 58.0
|
273
|
+
# 2 Gentoo 40.9 59.6
|
274
|
+
#
|
275
|
+
# @overload summarize
|
276
|
+
# Summarize by a function.
|
277
|
+
#
|
278
|
+
# @yieldparam group [Group]
|
279
|
+
# passes group object self.
|
280
|
+
# @yieldreturn [Hash{Symbol, String => DataFrame}]
|
281
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
282
|
+
# The DataFrame must return only one aggregated column.
|
283
|
+
# @return [DataFrame]
|
284
|
+
# summarized DataFrame.
|
285
|
+
# @example Rename column name by Hash
|
286
|
+
# group.summarize {
|
287
|
+
# {
|
288
|
+
# min_bill_length_mm: min(:bill_length_mm),
|
289
|
+
# max_bill_length_mm: max(:bill_length_mm),
|
290
|
+
# }
|
291
|
+
# }
|
292
|
+
#
|
293
|
+
# # =>
|
294
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
295
|
+
# species min_bill_length_mm max_bill_length_mm
|
296
|
+
# <string> <double> <double>
|
297
|
+
# 0 Adelie 32.1 46.0
|
298
|
+
# 1 Chinstrap 40.9 58.0
|
299
|
+
# 2 Gentoo 40.9 59.6
|
300
|
+
#
|
301
|
+
def summarize(*args, &block)
|
302
|
+
if block
|
303
|
+
agg = instance_eval(&block)
|
304
|
+
unless args.empty?
|
305
|
+
agg = [agg] if agg.is_a?(DataFrame)
|
306
|
+
agg = args.zip(agg).to_h
|
307
|
+
end
|
308
|
+
else
|
309
|
+
agg = args
|
310
|
+
end
|
311
|
+
|
253
312
|
case agg
|
254
313
|
when DataFrame
|
255
314
|
agg
|
256
315
|
when Array
|
257
|
-
|
316
|
+
aggregations =
|
317
|
+
agg.map do |df|
|
318
|
+
v = df.vectors[-1]
|
319
|
+
[v.key, v]
|
320
|
+
end
|
321
|
+
agg[0].assign(aggregations)
|
322
|
+
when Hash
|
323
|
+
aggregations =
|
324
|
+
agg.map do |key, df|
|
325
|
+
aggregated_keys = df.keys - @group_keys
|
326
|
+
if aggregated_keys.size > 1
|
327
|
+
message =
|
328
|
+
"accept only one column from the Hash: #{aggregated_keys.join(', ')}"
|
329
|
+
raise GroupArgumentError, message
|
330
|
+
end
|
331
|
+
|
332
|
+
v = df.vectors[-1]
|
333
|
+
[key, v]
|
334
|
+
end
|
335
|
+
agg.values[-1].drop(-1).assign(aggregations)
|
258
336
|
else
|
259
337
|
raise GroupArgumentError, "Unknown argument: #{agg}"
|
260
338
|
end
|
261
339
|
end
|
262
340
|
|
341
|
+
# Return grouped DataFrame only for group keys.
|
342
|
+
#
|
343
|
+
# @return [DataFrame]
|
344
|
+
# grouped DataFrame projected only for group_keys.
|
345
|
+
# @since 0.5.0
|
346
|
+
#
|
347
|
+
def grouped_frame
|
348
|
+
DataFrame.create(group_table[group_keys])
|
349
|
+
end
|
350
|
+
alias_method :none, :grouped_frame
|
351
|
+
|
263
352
|
# Aggregating summary.
|
264
353
|
#
|
265
354
|
# @api private
|
@@ -270,37 +359,49 @@ module RedAmber
|
|
270
359
|
|
271
360
|
private
|
272
361
|
|
273
|
-
def
|
274
|
-
|
275
|
-
[function_name]
|
276
|
-
else
|
277
|
-
summary_keys.map { |key| "#{function_name}(#{key})" }
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
|
-
# @note `@group_counts.sum == @dataframe.size``
|
282
|
-
def group_counts
|
283
|
-
@group_counts ||= filters.map(&:sum)
|
362
|
+
def group_table
|
363
|
+
@group_table ||= build_aggregated_table
|
284
364
|
end
|
285
365
|
|
286
|
-
def
|
287
|
-
|
288
|
-
|
289
|
-
|
366
|
+
def build_aggregated_table
|
367
|
+
keys = @group_keys
|
368
|
+
key = keys[0]
|
369
|
+
table = @dataframe.table
|
370
|
+
|
371
|
+
plan = Arrow::ExecutePlan.new
|
372
|
+
source_node = plan.build_source_node(table)
|
373
|
+
|
374
|
+
aggregate_node =
|
375
|
+
plan.build_aggregate_node(source_node, {
|
376
|
+
aggregations: [{ function: 'hash_count',
|
377
|
+
input: key }], keys: keys
|
378
|
+
})
|
379
|
+
expressions = keys.map { |k| Arrow::FieldExpression.new(k) }
|
380
|
+
null_count = Arrow::Function.find('is_null').execute([table[key]]).value.sum
|
381
|
+
count_field = Arrow::FieldExpression.new("count(#{key})")
|
382
|
+
if null_count.zero?
|
383
|
+
expressions << count_field
|
384
|
+
else
|
385
|
+
is_zero =
|
386
|
+
Arrow::CallExpression.new('equal', [count_field, Arrow::Int64Scalar.new(0)])
|
387
|
+
null_count_scalar = Arrow::Int64Scalar.new(null_count)
|
388
|
+
expressions <<
|
389
|
+
Arrow::CallExpression.new('if_else', [
|
390
|
+
is_zero, null_count_scalar, count_field
|
391
|
+
])
|
290
392
|
end
|
291
|
-
|
393
|
+
options = Arrow::ProjectNodeOptions.new(expressions, keys + [:group_count])
|
394
|
+
project_node = plan.build_project_node(aggregate_node, options)
|
292
395
|
|
293
|
-
|
294
|
-
|
295
|
-
arrays = table.columns.map(&:data)
|
396
|
+
sink_and_start_plan(plan, project_node)
|
397
|
+
end
|
296
398
|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
399
|
+
def build_aggregation_keys(function_name, summary_keys)
|
400
|
+
if summary_keys.empty?
|
401
|
+
[function_name]
|
402
|
+
else
|
403
|
+
summary_keys.map { |key| "#{function_name}(#{key})" }
|
301
404
|
end
|
302
|
-
|
303
|
-
Arrow::Table.new(Arrow::Schema.new(fields), arrays)
|
304
405
|
end
|
305
406
|
|
306
407
|
# Call Vector aggregating function and return an array of arrays:
|
data/lib/red_amber/helper.rb
CHANGED
@@ -78,6 +78,32 @@ module RedAmber
|
|
78
78
|
Array(range)
|
79
79
|
end
|
80
80
|
end
|
81
|
+
|
82
|
+
# Create sink node and execute plan
|
83
|
+
#
|
84
|
+
# @param plan [Arrow::ExecutePlan]
|
85
|
+
# Execute plan of Acero.
|
86
|
+
# @param node [Arrow::ExecuteNode]
|
87
|
+
# Execute node of Acero.
|
88
|
+
# @param output_schema [Arrow::Schema, nil]
|
89
|
+
# Schema of table to output. If it is nil, output_schema of
|
90
|
+
# sink node is used.
|
91
|
+
# @return [Arrow::Table]
|
92
|
+
# Result of plan.
|
93
|
+
# @since 0.5.0
|
94
|
+
#
|
95
|
+
def sink_and_start_plan(plan, node, output_schema: nil)
|
96
|
+
sink_node_options = Arrow::SinkNodeOptions.new
|
97
|
+
plan.build_sink_node(node, sink_node_options)
|
98
|
+
plan.validate
|
99
|
+
plan.start
|
100
|
+
plan.wait
|
101
|
+
output_schema = node.output_schema if output_schema.nil?
|
102
|
+
reader = sink_node_options.get_reader(output_schema)
|
103
|
+
table = reader.read_all
|
104
|
+
plan.stop
|
105
|
+
table
|
106
|
+
end
|
81
107
|
end
|
82
108
|
|
83
109
|
# rubocop:disable Layout/LineLength
|
data/lib/red_amber/subframes.rb
CHANGED
@@ -20,6 +20,7 @@ module RedAmber
|
|
20
20
|
@sizes = []
|
21
21
|
end
|
22
22
|
|
23
|
+
# Generic iterator method
|
23
24
|
def each
|
24
25
|
@selectors.each
|
25
26
|
end
|
@@ -27,14 +28,20 @@ module RedAmber
|
|
27
28
|
|
28
29
|
# Boolean selectors of sub-dataframes
|
29
30
|
class Filters < Selectors
|
31
|
+
# Return sizes of filter
|
32
|
+
# @return [Array<Integer>]
|
33
|
+
# sizes of each sub dataframes.
|
34
|
+
# Counts true for each filter.
|
30
35
|
def sizes
|
31
|
-
# count true
|
32
36
|
@sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
40
|
# Index selectors of sub-dataframes
|
37
41
|
class Indices < Selectors
|
42
|
+
# Return sizes of selector indices.
|
43
|
+
# @return [Array<Integer>]
|
44
|
+
# sizes of each sub dataframes.
|
38
45
|
def sizes
|
39
46
|
@sizes = @selectors.map(&:size)
|
40
47
|
end
|
@@ -93,7 +100,7 @@ module RedAmber
|
|
93
100
|
# @since 0.4.0
|
94
101
|
#
|
95
102
|
def by_group(group)
|
96
|
-
SubFrames.
|
103
|
+
SubFrames.by_filters(group.dataframe, group.filters)
|
97
104
|
end
|
98
105
|
|
99
106
|
# Create a new SubFrames object from a DataFrame and an array of indices.
|
@@ -291,15 +298,15 @@ module RedAmber
|
|
291
298
|
selectors = yield(dataframe)
|
292
299
|
end
|
293
300
|
|
294
|
-
if dataframe.empty? || selectors.nil? || selectors.
|
301
|
+
if dataframe.empty? || selectors.nil? || selectors.size.zero? # rubocop:disable Style/ZeroLengthPredicate
|
295
302
|
@baseframe = DataFrame.new
|
296
303
|
@selectors = Selectors.new([])
|
297
304
|
else
|
298
305
|
@baseframe = dataframe
|
299
306
|
@selectors =
|
300
|
-
if selectors
|
307
|
+
if selectors.first.boolean?
|
301
308
|
Filters.new(selectors)
|
302
|
-
elsif selectors
|
309
|
+
elsif selectors.first.numeric?
|
303
310
|
Indices.new(selectors)
|
304
311
|
else
|
305
312
|
raise SubFramesArgumentError, "illegal type: #{selectors}"
|
data/lib/red_amber/vector.rb
CHANGED
@@ -10,21 +10,54 @@ module RedAmber
|
|
10
10
|
include ArrowFunction
|
11
11
|
include VectorUpdatable
|
12
12
|
include VectorSelectable
|
13
|
+
include VectorStringFunction
|
13
14
|
|
14
15
|
using RefineArrayLike
|
15
16
|
|
16
|
-
#
|
17
|
+
# Entity of Vector.
|
17
18
|
#
|
18
|
-
# @
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
# @return [Arrow::Array]
|
20
|
+
#
|
21
|
+
attr_reader :data
|
22
|
+
alias_method :to_arrow_array, :data
|
23
|
+
|
24
|
+
# Associated key name when self is in a DataFrame.
|
25
|
+
#
|
26
|
+
# Default Vector is 'head-less' (key-less).
|
27
|
+
# @return [Symbol]
|
23
28
|
#
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
29
|
+
attr_accessor :key
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# Create a Vector (calling `.new`).
|
33
|
+
#
|
34
|
+
# @param (see #initialize)
|
35
|
+
# @return (see #initialize)
|
36
|
+
# @example Create an empty Vector.
|
37
|
+
# Vector[]
|
38
|
+
# # =>
|
39
|
+
# #<RedAmber::Vector(:string, size=0):0x000000000000e2cc>
|
40
|
+
# []
|
41
|
+
#
|
42
|
+
# @since 0.5.0
|
43
|
+
#
|
44
|
+
def [](...)
|
45
|
+
new(...)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Quicker constructor of Vector.
|
49
|
+
#
|
50
|
+
# @param arrow_array [Arrow::Array]
|
51
|
+
# Arrow::Array object to have in the Vector.
|
52
|
+
# @return [Vector]
|
53
|
+
# created Vector.
|
54
|
+
# @note This method doesn't check argment type.
|
55
|
+
#
|
56
|
+
def create(arrow_array)
|
57
|
+
instance = allocate
|
58
|
+
instance.instance_variable_set(:@data, arrow_array)
|
59
|
+
instance
|
60
|
+
end
|
28
61
|
end
|
29
62
|
|
30
63
|
# Create a Vector.
|
@@ -51,20 +84,6 @@ module RedAmber
|
|
51
84
|
end
|
52
85
|
end
|
53
86
|
|
54
|
-
# Entity of Vector.
|
55
|
-
#
|
56
|
-
# @return [Arrow::Array]
|
57
|
-
#
|
58
|
-
attr_reader :data
|
59
|
-
alias_method :to_arrow_array, :data
|
60
|
-
|
61
|
-
# Associated key name when self is in a DataFrame.
|
62
|
-
#
|
63
|
-
# Default Vector is 'head-less' (key-less).
|
64
|
-
# @return [Symbol]
|
65
|
-
#
|
66
|
-
attr_accessor :key
|
67
|
-
|
68
87
|
# Return other as a Vector which is same data type as self.
|
69
88
|
#
|
70
89
|
# @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
@@ -161,6 +161,22 @@ module RedAmber
|
|
161
161
|
#
|
162
162
|
define_unary_aggregation :min_max
|
163
163
|
|
164
|
+
# Compute the 1 most common values and their respective
|
165
|
+
# occurence counts.
|
166
|
+
#
|
167
|
+
# @note Self must be a numeric or a boolean Vector.
|
168
|
+
# @note ModeOptions are not supported in 0.5.0 .
|
169
|
+
# Only one mode value is returned.
|
170
|
+
# @api private
|
171
|
+
# @return [Hash{'mode'=>mode, 'count'=>count}]
|
172
|
+
# mode and count of self in an array.
|
173
|
+
# @since 0.5.0
|
174
|
+
#
|
175
|
+
def mode
|
176
|
+
datum = find(:mode).execute([data])
|
177
|
+
datum.value.to_a.first
|
178
|
+
end
|
179
|
+
|
164
180
|
# Compute product value of self.
|
165
181
|
#
|
166
182
|
# @note Self must be a numeric Vector.
|
@@ -241,6 +257,16 @@ module RedAmber
|
|
241
257
|
# - nearest: returns i or j, whichever is closer.
|
242
258
|
# - midpoint: returns (i + j) / 2.
|
243
259
|
|
260
|
+
# Get a non-nil element in self.
|
261
|
+
#
|
262
|
+
# @return [Object, nil]
|
263
|
+
# first non-nil value detected. If all elements are nil, return nil.
|
264
|
+
# @since 0.5.0
|
265
|
+
#
|
266
|
+
def one
|
267
|
+
each.find { !_1.nil? }
|
268
|
+
end
|
269
|
+
|
244
270
|
# Returns a quantile value.
|
245
271
|
# - 0.5 quantile (median) is returned by default.
|
246
272
|
# - Or return quantile for specified probability (prob).
|