red_amber 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -2
- data/CHANGELOG.md +57 -0
- data/Gemfile +5 -6
- data/README.ja.md +252 -0
- data/README.md +19 -14
- data/benchmark/group.yml +12 -5
- data/docker/Gemfile +8 -3
- data/docker/Gemfile.lock +54 -16
- data/docker/example +29 -17
- data/lib/red_amber/data_frame.rb +78 -4
- data/lib/red_amber/data_frame_combinable.rb +147 -119
- data/lib/red_amber/data_frame_displayable.rb +6 -5
- data/lib/red_amber/data_frame_selectable.rb +49 -0
- data/lib/red_amber/group.rb +191 -90
- data/lib/red_amber/helper.rb +26 -0
- data/lib/red_amber/subframes.rb +12 -5
- data/lib/red_amber/vector.rb +43 -24
- data/lib/red_amber/vector_aggregation.rb +26 -0
- data/lib/red_amber/vector_selectable.rb +74 -23
- data/lib/red_amber/vector_string_function.rb +211 -0
- data/lib/red_amber/vector_unary_element_wise.rb +4 -0
- data/lib/red_amber/vector_updatable.rb +28 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -1
- data/red_amber.gemspec +1 -1
- metadata +7 -5
@@ -836,6 +836,55 @@ module RedAmber
|
|
836
836
|
tail(n_obs)
|
837
837
|
end
|
838
838
|
|
839
|
+
# Select records randomly to create a DataFrame.
|
840
|
+
# This method calls `indices.sample`.
|
841
|
+
# We can use the same arguments in `Vector#sample`.
|
842
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
843
|
+
#
|
844
|
+
# @overload sample()
|
845
|
+
# Return a DataFrame with a randomly selected record.
|
846
|
+
#
|
847
|
+
# @return [DataFrame]
|
848
|
+
# a DataFrame with single record.
|
849
|
+
#
|
850
|
+
# @overload sample(n)
|
851
|
+
# Return a DataFrame with n records selected at random.
|
852
|
+
#
|
853
|
+
# @param n [Integer]
|
854
|
+
# positive number of records to select.
|
855
|
+
# If n is smaller or equal to size, records are selected by non-repeating.
|
856
|
+
# If n is greater than `size`, records are selected repeatedly.
|
857
|
+
# @return [DataFrame]
|
858
|
+
# a DataFrame with sampled records.
|
859
|
+
#
|
860
|
+
# @overload sample(prop)
|
861
|
+
# Return a DataFrame with records by proportion `prop` at random.
|
862
|
+
#
|
863
|
+
# @param prop [Float]
|
864
|
+
# positive proportion of records to select.
|
865
|
+
# Absolute number of records to select:`prop*size` is rounded (by `half: :up`).
|
866
|
+
# If prop is smaller or equal to 1.0, records are selected by non-repeating.
|
867
|
+
# If prop is greater than 1.0, some records are selected repeatedly.
|
868
|
+
# @return [Vector]
|
869
|
+
# a DataFrame with sampled records.
|
870
|
+
#
|
871
|
+
# @since 0.5.0
|
872
|
+
#
|
873
|
+
def sample(n_or_prop = nil)
|
874
|
+
slice { indices.sample(n_or_prop) }
|
875
|
+
end
|
876
|
+
|
877
|
+
# Returns a DataFrame with shuffled rows.
|
878
|
+
#
|
879
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
880
|
+
# @note Same behavior as `DataFrame#sample(1.0)`
|
881
|
+
# @return (see #sample)
|
882
|
+
# @since 0.5.0
|
883
|
+
#
|
884
|
+
def shuffle
|
885
|
+
sample(1.0)
|
886
|
+
end
|
887
|
+
|
839
888
|
# Select records by index Array to create a DataFrame.
|
840
889
|
#
|
841
890
|
# - TODO: support for option `boundscheck: true`
|
data/lib/red_amber/group.rb
CHANGED
@@ -4,6 +4,7 @@ module RedAmber
|
|
4
4
|
# Group class
|
5
5
|
class Group
|
6
6
|
include Enumerable # This feature is experimental
|
7
|
+
include Helper
|
7
8
|
|
8
9
|
using RefineArrowTable
|
9
10
|
|
@@ -114,15 +115,27 @@ module RedAmber
|
|
114
115
|
#
|
115
116
|
def filters
|
116
117
|
@filters ||= begin
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
118
|
+
group_values = group_table[group_keys].each_record.map(&:to_a)
|
119
|
+
|
120
|
+
Enumerator.new(group_table.n_rows) do |yielder|
|
121
|
+
group_values.each do |values|
|
122
|
+
booleans =
|
123
|
+
values.map.with_index do |value, i|
|
124
|
+
column = @dataframe[group_keys[i]].data
|
125
|
+
if value.nil?
|
126
|
+
Arrow::Function.find('is_null').execute([column])
|
127
|
+
elsif value.is_a?(Float) && value.nan?
|
128
|
+
Arrow::Function.find('is_nan').execute([column])
|
129
|
+
else
|
130
|
+
Arrow::Function.find('equal').execute([column, value])
|
131
|
+
end
|
132
|
+
end
|
133
|
+
filter =
|
134
|
+
booleans.reduce do |result, datum|
|
135
|
+
Arrow::Function.find('and_kleene').execute([result, datum])
|
136
|
+
end
|
137
|
+
yielder << Vector.create(filter.value)
|
138
|
+
end
|
126
139
|
end
|
127
140
|
end
|
128
141
|
end
|
@@ -147,11 +160,10 @@ module RedAmber
|
|
147
160
|
# group size.
|
148
161
|
#
|
149
162
|
def each
|
150
|
-
filters
|
151
163
|
return enum_for(:each) unless block_given?
|
152
164
|
|
153
|
-
|
154
|
-
yield @dataframe
|
165
|
+
filters.each do |filter|
|
166
|
+
yield @dataframe.filter(filter)
|
155
167
|
end
|
156
168
|
@filters.size
|
157
169
|
end
|
@@ -174,7 +186,7 @@ module RedAmber
|
|
174
186
|
# 2 Gentoo 124
|
175
187
|
#
|
176
188
|
def group_count
|
177
|
-
DataFrame.create(
|
189
|
+
DataFrame.create(group_table)
|
178
190
|
end
|
179
191
|
|
180
192
|
# String representation of self.
|
@@ -186,80 +198,157 @@ module RedAmber
|
|
186
198
|
#
|
187
199
|
# # =>
|
188
200
|
# #<RedAmber::Group : 0x0000000000003a98>
|
189
|
-
# species
|
190
|
-
# <string>
|
191
|
-
# 0 Adelie
|
192
|
-
# 1 Chinstrap
|
193
|
-
# 2 Gentoo
|
201
|
+
# species group_count
|
202
|
+
# <string> <uint8>
|
203
|
+
# 0 Adelie 152
|
204
|
+
# 1 Chinstrap 68
|
205
|
+
# 2 Gentoo 124
|
194
206
|
#
|
195
207
|
def inspect
|
196
|
-
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{
|
208
|
+
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
|
197
209
|
end
|
198
210
|
|
199
211
|
# Summarize Group by aggregation functions from the block.
|
200
212
|
#
|
201
|
-
# @
|
202
|
-
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
#
|
206
|
-
#
|
207
|
-
#
|
208
|
-
#
|
209
|
-
#
|
213
|
+
# @overload summarize
|
214
|
+
# Summarize by a function.
|
215
|
+
# @yieldparam group [Group]
|
216
|
+
# passes group object self.
|
217
|
+
# @yieldreturn [DataFrame]
|
218
|
+
# @yieldreturn [DataFrame, Array<DataFrame>, Hash{Symbol, String => DataFrame}]
|
219
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
220
|
+
# @return [DataFrame]
|
221
|
+
# summarized DataFrame.
|
222
|
+
# @example Single function and single variable
|
223
|
+
# group = penguins.group(:species)
|
224
|
+
# group
|
210
225
|
#
|
211
|
-
#
|
212
|
-
#
|
213
|
-
#
|
214
|
-
#
|
215
|
-
#
|
216
|
-
#
|
217
|
-
#
|
226
|
+
# # =>
|
227
|
+
# #<RedAmber::Group : 0x000000000000c314>
|
228
|
+
# species group_count
|
229
|
+
# <string> <uint8>
|
230
|
+
# 0 Adelie 152
|
231
|
+
# 1 Chinstrap 68
|
232
|
+
# 2 Gentoo 124
|
218
233
|
#
|
219
|
-
#
|
234
|
+
# group.summarize { mean(:bill_length_mm) }
|
220
235
|
#
|
221
|
-
#
|
222
|
-
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
#
|
227
|
-
#
|
236
|
+
# # =>
|
237
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c364>
|
238
|
+
# species mean(bill_length_mm)
|
239
|
+
# <string> <double>
|
240
|
+
# 0 Adelie 38.79
|
241
|
+
# 1 Chinstrap 48.83
|
242
|
+
# 2 Gentoo 47.5
|
228
243
|
#
|
229
|
-
#
|
230
|
-
#
|
244
|
+
# @example Single function only
|
245
|
+
# group.summarize { mean }
|
231
246
|
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
236
|
-
#
|
237
|
-
#
|
238
|
-
#
|
247
|
+
# # =>
|
248
|
+
# #<RedAmber::DataFrame : 3 x 6 Vectors, 0x000000000000c350>
|
249
|
+
# species mean(bill_length_mm) mean(bill_depth_mm) ... mean(year)
|
250
|
+
# <string> <double> <double> ... <double>
|
251
|
+
# 0 Adelie 38.79 18.35 ... 2008.01
|
252
|
+
# 1 Chinstrap 48.83 18.42 ... 2007.97
|
253
|
+
# 2 Gentoo 47.5 14.98 ... 2008.08
|
239
254
|
#
|
240
|
-
# @
|
241
|
-
#
|
255
|
+
# @overload summarize
|
256
|
+
# Summarize by a function.
|
242
257
|
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
247
|
-
#
|
248
|
-
#
|
249
|
-
#
|
250
|
-
#
|
251
|
-
|
252
|
-
|
258
|
+
# @yieldparam group [Group]
|
259
|
+
# passes group object self.
|
260
|
+
# @yieldreturn [Array<DataFrame>]
|
261
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
262
|
+
# @return [DataFrame]
|
263
|
+
# summarized DataFrame.
|
264
|
+
# @example Multiple functions
|
265
|
+
# group.summarize { [min(:bill_length_mm), max(:bill_length_mm)] }
|
266
|
+
#
|
267
|
+
# # =>
|
268
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
269
|
+
# species min(bill_length_mm) max(bill_length_mm)
|
270
|
+
# <string> <double> <double>
|
271
|
+
# 0 Adelie 32.1 46.0
|
272
|
+
# 1 Chinstrap 40.9 58.0
|
273
|
+
# 2 Gentoo 40.9 59.6
|
274
|
+
#
|
275
|
+
# @overload summarize
|
276
|
+
# Summarize by a function.
|
277
|
+
#
|
278
|
+
# @yieldparam group [Group]
|
279
|
+
# passes group object self.
|
280
|
+
# @yieldreturn [Hash{Symbol, String => DataFrame}]
|
281
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
282
|
+
# The DataFrame must return only one aggregated column.
|
283
|
+
# @return [DataFrame]
|
284
|
+
# summarized DataFrame.
|
285
|
+
# @example Rename column name by Hash
|
286
|
+
# group.summarize {
|
287
|
+
# {
|
288
|
+
# min_bill_length_mm: min(:bill_length_mm),
|
289
|
+
# max_bill_length_mm: max(:bill_length_mm),
|
290
|
+
# }
|
291
|
+
# }
|
292
|
+
#
|
293
|
+
# # =>
|
294
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
295
|
+
# species min_bill_length_mm max_bill_length_mm
|
296
|
+
# <string> <double> <double>
|
297
|
+
# 0 Adelie 32.1 46.0
|
298
|
+
# 1 Chinstrap 40.9 58.0
|
299
|
+
# 2 Gentoo 40.9 59.6
|
300
|
+
#
|
301
|
+
def summarize(*args, &block)
|
302
|
+
if block
|
303
|
+
agg = instance_eval(&block)
|
304
|
+
unless args.empty?
|
305
|
+
agg = [agg] if agg.is_a?(DataFrame)
|
306
|
+
agg = args.zip(agg).to_h
|
307
|
+
end
|
308
|
+
else
|
309
|
+
agg = args
|
310
|
+
end
|
311
|
+
|
253
312
|
case agg
|
254
313
|
when DataFrame
|
255
314
|
agg
|
256
315
|
when Array
|
257
|
-
|
316
|
+
aggregations =
|
317
|
+
agg.map do |df|
|
318
|
+
v = df.vectors[-1]
|
319
|
+
[v.key, v]
|
320
|
+
end
|
321
|
+
agg[0].assign(aggregations)
|
322
|
+
when Hash
|
323
|
+
aggregations =
|
324
|
+
agg.map do |key, df|
|
325
|
+
aggregated_keys = df.keys - @group_keys
|
326
|
+
if aggregated_keys.size > 1
|
327
|
+
message =
|
328
|
+
"accept only one column from the Hash: #{aggregated_keys.join(', ')}"
|
329
|
+
raise GroupArgumentError, message
|
330
|
+
end
|
331
|
+
|
332
|
+
v = df.vectors[-1]
|
333
|
+
[key, v]
|
334
|
+
end
|
335
|
+
agg.values[-1].drop(-1).assign(aggregations)
|
258
336
|
else
|
259
337
|
raise GroupArgumentError, "Unknown argument: #{agg}"
|
260
338
|
end
|
261
339
|
end
|
262
340
|
|
341
|
+
# Return grouped DataFrame only for group keys.
|
342
|
+
#
|
343
|
+
# @return [DataFrame]
|
344
|
+
# grouped DataFrame projected only for group_keys.
|
345
|
+
# @since 0.5.0
|
346
|
+
#
|
347
|
+
def grouped_frame
|
348
|
+
DataFrame.create(group_table[group_keys])
|
349
|
+
end
|
350
|
+
alias_method :none, :grouped_frame
|
351
|
+
|
263
352
|
# Aggregating summary.
|
264
353
|
#
|
265
354
|
# @api private
|
@@ -270,37 +359,49 @@ module RedAmber
|
|
270
359
|
|
271
360
|
private
|
272
361
|
|
273
|
-
def
|
274
|
-
|
275
|
-
[function_name]
|
276
|
-
else
|
277
|
-
summary_keys.map { |key| "#{function_name}(#{key})" }
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
|
-
# @note `@group_counts.sum == @dataframe.size``
|
282
|
-
def group_counts
|
283
|
-
@group_counts ||= filters.map(&:sum)
|
362
|
+
def group_table
|
363
|
+
@group_table ||= build_aggregated_table
|
284
364
|
end
|
285
365
|
|
286
|
-
def
|
287
|
-
|
288
|
-
|
289
|
-
|
366
|
+
def build_aggregated_table
|
367
|
+
keys = @group_keys
|
368
|
+
key = keys[0]
|
369
|
+
table = @dataframe.table
|
370
|
+
|
371
|
+
plan = Arrow::ExecutePlan.new
|
372
|
+
source_node = plan.build_source_node(table)
|
373
|
+
|
374
|
+
aggregate_node =
|
375
|
+
plan.build_aggregate_node(source_node, {
|
376
|
+
aggregations: [{ function: 'hash_count',
|
377
|
+
input: key }], keys: keys
|
378
|
+
})
|
379
|
+
expressions = keys.map { |k| Arrow::FieldExpression.new(k) }
|
380
|
+
null_count = Arrow::Function.find('is_null').execute([table[key]]).value.sum
|
381
|
+
count_field = Arrow::FieldExpression.new("count(#{key})")
|
382
|
+
if null_count.zero?
|
383
|
+
expressions << count_field
|
384
|
+
else
|
385
|
+
is_zero =
|
386
|
+
Arrow::CallExpression.new('equal', [count_field, Arrow::Int64Scalar.new(0)])
|
387
|
+
null_count_scalar = Arrow::Int64Scalar.new(null_count)
|
388
|
+
expressions <<
|
389
|
+
Arrow::CallExpression.new('if_else', [
|
390
|
+
is_zero, null_count_scalar, count_field
|
391
|
+
])
|
290
392
|
end
|
291
|
-
|
393
|
+
options = Arrow::ProjectNodeOptions.new(expressions, keys + [:group_count])
|
394
|
+
project_node = plan.build_project_node(aggregate_node, options)
|
292
395
|
|
293
|
-
|
294
|
-
|
295
|
-
arrays = table.columns.map(&:data)
|
396
|
+
sink_and_start_plan(plan, project_node)
|
397
|
+
end
|
296
398
|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
399
|
+
def build_aggregation_keys(function_name, summary_keys)
|
400
|
+
if summary_keys.empty?
|
401
|
+
[function_name]
|
402
|
+
else
|
403
|
+
summary_keys.map { |key| "#{function_name}(#{key})" }
|
301
404
|
end
|
302
|
-
|
303
|
-
Arrow::Table.new(Arrow::Schema.new(fields), arrays)
|
304
405
|
end
|
305
406
|
|
306
407
|
# Call Vector aggregating function and return an array of arrays:
|
data/lib/red_amber/helper.rb
CHANGED
@@ -78,6 +78,32 @@ module RedAmber
|
|
78
78
|
Array(range)
|
79
79
|
end
|
80
80
|
end
|
81
|
+
|
82
|
+
# Create sink node and execute plan
|
83
|
+
#
|
84
|
+
# @param plan [Arrow::ExecutePlan]
|
85
|
+
# Execute plan of Acero.
|
86
|
+
# @param node [Arrow::ExecuteNode]
|
87
|
+
# Execute node of Acero.
|
88
|
+
# @param output_schema [Arrow::Schema, nil]
|
89
|
+
# Schema of table to output. If it is nil, output_schema of
|
90
|
+
# sink node is used.
|
91
|
+
# @return [Arrow::Table]
|
92
|
+
# Result of plan.
|
93
|
+
# @since 0.5.0
|
94
|
+
#
|
95
|
+
def sink_and_start_plan(plan, node, output_schema: nil)
|
96
|
+
sink_node_options = Arrow::SinkNodeOptions.new
|
97
|
+
plan.build_sink_node(node, sink_node_options)
|
98
|
+
plan.validate
|
99
|
+
plan.start
|
100
|
+
plan.wait
|
101
|
+
output_schema = node.output_schema if output_schema.nil?
|
102
|
+
reader = sink_node_options.get_reader(output_schema)
|
103
|
+
table = reader.read_all
|
104
|
+
plan.stop
|
105
|
+
table
|
106
|
+
end
|
81
107
|
end
|
82
108
|
|
83
109
|
# rubocop:disable Layout/LineLength
|
data/lib/red_amber/subframes.rb
CHANGED
@@ -20,6 +20,7 @@ module RedAmber
|
|
20
20
|
@sizes = []
|
21
21
|
end
|
22
22
|
|
23
|
+
# Generic iterator method
|
23
24
|
def each
|
24
25
|
@selectors.each
|
25
26
|
end
|
@@ -27,14 +28,20 @@ module RedAmber
|
|
27
28
|
|
28
29
|
# Boolean selectors of sub-dataframes
|
29
30
|
class Filters < Selectors
|
31
|
+
# Return sizes of filter
|
32
|
+
# @return [Array<Integer>]
|
33
|
+
# sizes of each sub dataframes.
|
34
|
+
# Counts true for each filter.
|
30
35
|
def sizes
|
31
|
-
# count true
|
32
36
|
@sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
40
|
# Index selectors of sub-dataframes
|
37
41
|
class Indices < Selectors
|
42
|
+
# Return sizes of selector indices.
|
43
|
+
# @return [Array<Integer>]
|
44
|
+
# sizes of each sub dataframes.
|
38
45
|
def sizes
|
39
46
|
@sizes = @selectors.map(&:size)
|
40
47
|
end
|
@@ -93,7 +100,7 @@ module RedAmber
|
|
93
100
|
# @since 0.4.0
|
94
101
|
#
|
95
102
|
def by_group(group)
|
96
|
-
SubFrames.
|
103
|
+
SubFrames.by_filters(group.dataframe, group.filters)
|
97
104
|
end
|
98
105
|
|
99
106
|
# Create a new SubFrames object from a DataFrame and an array of indices.
|
@@ -291,15 +298,15 @@ module RedAmber
|
|
291
298
|
selectors = yield(dataframe)
|
292
299
|
end
|
293
300
|
|
294
|
-
if dataframe.empty? || selectors.nil? || selectors.
|
301
|
+
if dataframe.empty? || selectors.nil? || selectors.size.zero? # rubocop:disable Style/ZeroLengthPredicate
|
295
302
|
@baseframe = DataFrame.new
|
296
303
|
@selectors = Selectors.new([])
|
297
304
|
else
|
298
305
|
@baseframe = dataframe
|
299
306
|
@selectors =
|
300
|
-
if selectors
|
307
|
+
if selectors.first.boolean?
|
301
308
|
Filters.new(selectors)
|
302
|
-
elsif selectors
|
309
|
+
elsif selectors.first.numeric?
|
303
310
|
Indices.new(selectors)
|
304
311
|
else
|
305
312
|
raise SubFramesArgumentError, "illegal type: #{selectors}"
|
data/lib/red_amber/vector.rb
CHANGED
@@ -10,21 +10,54 @@ module RedAmber
|
|
10
10
|
include ArrowFunction
|
11
11
|
include VectorUpdatable
|
12
12
|
include VectorSelectable
|
13
|
+
include VectorStringFunction
|
13
14
|
|
14
15
|
using RefineArrayLike
|
15
16
|
|
16
|
-
#
|
17
|
+
# Entity of Vector.
|
17
18
|
#
|
18
|
-
# @
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
# @return [Arrow::Array]
|
20
|
+
#
|
21
|
+
attr_reader :data
|
22
|
+
alias_method :to_arrow_array, :data
|
23
|
+
|
24
|
+
# Associated key name when self is in a DataFrame.
|
25
|
+
#
|
26
|
+
# Default Vector is 'head-less' (key-less).
|
27
|
+
# @return [Symbol]
|
23
28
|
#
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
29
|
+
attr_accessor :key
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# Create a Vector (calling `.new`).
|
33
|
+
#
|
34
|
+
# @param (see #initialize)
|
35
|
+
# @return (see #initialize)
|
36
|
+
# @example Create an empty Vector.
|
37
|
+
# Vector[]
|
38
|
+
# # =>
|
39
|
+
# #<RedAmber::Vector(:string, size=0):0x000000000000e2cc>
|
40
|
+
# []
|
41
|
+
#
|
42
|
+
# @since 0.5.0
|
43
|
+
#
|
44
|
+
def [](...)
|
45
|
+
new(...)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Quicker constructor of Vector.
|
49
|
+
#
|
50
|
+
# @param arrow_array [Arrow::Array]
|
51
|
+
# Arrow::Array object to have in the Vector.
|
52
|
+
# @return [Vector]
|
53
|
+
# created Vector.
|
54
|
+
# @note This method doesn't check argment type.
|
55
|
+
#
|
56
|
+
def create(arrow_array)
|
57
|
+
instance = allocate
|
58
|
+
instance.instance_variable_set(:@data, arrow_array)
|
59
|
+
instance
|
60
|
+
end
|
28
61
|
end
|
29
62
|
|
30
63
|
# Create a Vector.
|
@@ -51,20 +84,6 @@ module RedAmber
|
|
51
84
|
end
|
52
85
|
end
|
53
86
|
|
54
|
-
# Entity of Vector.
|
55
|
-
#
|
56
|
-
# @return [Arrow::Array]
|
57
|
-
#
|
58
|
-
attr_reader :data
|
59
|
-
alias_method :to_arrow_array, :data
|
60
|
-
|
61
|
-
# Associated key name when self is in a DataFrame.
|
62
|
-
#
|
63
|
-
# Default Vector is 'head-less' (key-less).
|
64
|
-
# @return [Symbol]
|
65
|
-
#
|
66
|
-
attr_accessor :key
|
67
|
-
|
68
87
|
# Return other as a Vector which is same data type as self.
|
69
88
|
#
|
70
89
|
# @param other [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
@@ -161,6 +161,22 @@ module RedAmber
|
|
161
161
|
#
|
162
162
|
define_unary_aggregation :min_max
|
163
163
|
|
164
|
+
# Compute the 1 most common values and their respective
|
165
|
+
# occurence counts.
|
166
|
+
#
|
167
|
+
# @note Self must be a numeric or a boolean Vector.
|
168
|
+
# @note ModeOptions are not supported in 0.5.0 .
|
169
|
+
# Only one mode value is returned.
|
170
|
+
# @api private
|
171
|
+
# @return [Hash{'mode'=>mode, 'count'=>count}]
|
172
|
+
# mode and count of self in an array.
|
173
|
+
# @since 0.5.0
|
174
|
+
#
|
175
|
+
def mode
|
176
|
+
datum = find(:mode).execute([data])
|
177
|
+
datum.value.to_a.first
|
178
|
+
end
|
179
|
+
|
164
180
|
# Compute product value of self.
|
165
181
|
#
|
166
182
|
# @note Self must be a numeric Vector.
|
@@ -241,6 +257,16 @@ module RedAmber
|
|
241
257
|
# - nearest: returns i or j, whichever is closer.
|
242
258
|
# - midpoint: returns (i + j) / 2.
|
243
259
|
|
260
|
+
# Get a non-nil element in self.
|
261
|
+
#
|
262
|
+
# @return [Object, nil]
|
263
|
+
# first non-nil value detected. If all elements are nil, return nil.
|
264
|
+
# @since 0.5.0
|
265
|
+
#
|
266
|
+
def one
|
267
|
+
each.find { !_1.nil? }
|
268
|
+
end
|
269
|
+
|
244
270
|
# Returns a quantile value.
|
245
271
|
# - 0.5 quantile (median) is returned by default.
|
246
272
|
# - Or return quantile for specified probability (prob).
|