red-arrow 4.0.1 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +12 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/datum.rb +102 -0
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +58 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +119 -44
- data/lib/arrow/table-saver.rb +36 -5
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +112 -40
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -9
- data/test/helper.rb +3 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +34 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +210 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +376 -56
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +117 -168
data/lib/arrow/table.rb
CHANGED
@@ -195,8 +195,6 @@ module Arrow
|
|
195
195
|
alias_method :size, :n_rows
|
196
196
|
alias_method :length, :n_rows
|
197
197
|
|
198
|
-
alias_method :[], :find_column
|
199
|
-
|
200
198
|
alias_method :slice_raw, :slice
|
201
199
|
|
202
200
|
# @overload slice(offset, length)
|
@@ -236,6 +234,12 @@ module Arrow
|
|
236
234
|
# @return [Arrow::Table]
|
237
235
|
# The sub `Arrow::Table` that covers only rows of the range of indices.
|
238
236
|
#
|
237
|
+
# @overload slice(conditions)
|
238
|
+
#
|
239
|
+
# @param conditions [Hash] The conditions to select records.
|
240
|
+
# @return [Arrow::Table]
|
241
|
+
# The sub `Arrow::Table` that covers only rows matched by condition
|
242
|
+
#
|
239
243
|
# @overload slice
|
240
244
|
#
|
241
245
|
# @yield [slicer] Gives slicer that constructs condition to select records.
|
@@ -263,12 +267,37 @@ module Arrow
|
|
263
267
|
expected_n_args = nil
|
264
268
|
case args.size
|
265
269
|
when 1
|
266
|
-
|
270
|
+
case args[0]
|
271
|
+
when Integer
|
267
272
|
index = args[0]
|
268
273
|
index += n_rows if index < 0
|
269
274
|
return nil if index < 0
|
270
275
|
return nil if index >= n_rows
|
271
276
|
return Record.new(self, index)
|
277
|
+
when Hash
|
278
|
+
condition_pairs = args[0]
|
279
|
+
slicer = Slicer.new(self)
|
280
|
+
conditions = []
|
281
|
+
condition_pairs.each do |key, value|
|
282
|
+
case value
|
283
|
+
when Range
|
284
|
+
# TODO: Optimize "begin <= key <= end" case by missing "between" kernel
|
285
|
+
# https://issues.apache.org/jira/browse/ARROW-9843
|
286
|
+
unless value.begin.nil?
|
287
|
+
conditions << (slicer[key] >= value.begin)
|
288
|
+
end
|
289
|
+
unless value.end.nil?
|
290
|
+
if value.exclude_end?
|
291
|
+
conditions << (slicer[key] < value.end)
|
292
|
+
else
|
293
|
+
conditions << (slicer[key] <= value.end)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
else
|
297
|
+
conditions << (slicer[key] == value)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
slicers << conditions.inject(:&)
|
272
301
|
else
|
273
302
|
slicers << args[0]
|
274
303
|
end
|
@@ -397,41 +426,6 @@ module Arrow
|
|
397
426
|
remove_column_raw(index)
|
398
427
|
end
|
399
428
|
|
400
|
-
# TODO
|
401
|
-
#
|
402
|
-
# @return [Arrow::Table]
|
403
|
-
def select_columns(*selectors, &block)
|
404
|
-
if selectors.empty?
|
405
|
-
return to_enum(__method__) unless block_given?
|
406
|
-
selected_columns = columns.select(&block)
|
407
|
-
else
|
408
|
-
selected_columns = []
|
409
|
-
selectors.each do |selector|
|
410
|
-
case selector
|
411
|
-
when String, Symbol
|
412
|
-
column = find_column(selector)
|
413
|
-
if column.nil?
|
414
|
-
message = "unknown column: #{selector.inspect}: #{inspect}"
|
415
|
-
raise KeyError.new(message)
|
416
|
-
end
|
417
|
-
selected_columns << column
|
418
|
-
when Range
|
419
|
-
selected_columns.concat(columns[selector])
|
420
|
-
else
|
421
|
-
column = columns[selector]
|
422
|
-
if column.nil?
|
423
|
-
message = "out of index (0..#{n_columns - 1}): " +
|
424
|
-
"#{selector.inspect}: #{inspect}"
|
425
|
-
raise IndexError.new(message)
|
426
|
-
end
|
427
|
-
selected_columns << column
|
428
|
-
end
|
429
|
-
end
|
430
|
-
selected_columns = selected_columns.select(&block) if block_given?
|
431
|
-
end
|
432
|
-
self.class.new(selected_columns)
|
433
|
-
end
|
434
|
-
|
435
429
|
# Experimental
|
436
430
|
def group(*keys)
|
437
431
|
Group.new(self, keys)
|
@@ -442,8 +436,8 @@ module Arrow
|
|
442
436
|
RollingWindow.new(self, size)
|
443
437
|
end
|
444
438
|
|
445
|
-
def save(
|
446
|
-
saver = TableSaver.new(self,
|
439
|
+
def save(output, options={})
|
440
|
+
saver = TableSaver.new(self, output, options)
|
447
441
|
saver.save
|
448
442
|
end
|
449
443
|
|
@@ -454,6 +448,84 @@ module Arrow
|
|
454
448
|
self.class.new(schema, packed_arrays)
|
455
449
|
end
|
456
450
|
|
451
|
+
# @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
|
452
|
+
# @!macro join_common_before
|
453
|
+
# @param right [Arrow::Table] The right table.
|
454
|
+
#
|
455
|
+
# Join columns with `right` on join key columns.
|
456
|
+
#
|
457
|
+
# @!macro join_common_after
|
458
|
+
# @param type [Arrow::JoinType] How to join.
|
459
|
+
# @param left_outputs [::Array<String, Symbol>] Output columns in
|
460
|
+
# `self`.
|
461
|
+
#
|
462
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
463
|
+
# specified, all columns in `self` and `right` are
|
464
|
+
# outputted.
|
465
|
+
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
466
|
+
# `right`.
|
467
|
+
#
|
468
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
469
|
+
# specified, all columns in `self` and `right` are
|
470
|
+
# outputted.
|
471
|
+
# @return [Arrow::Table]
|
472
|
+
# The joined `Arrow::Table`.
|
473
|
+
#
|
474
|
+
# @macro join_common_before
|
475
|
+
# @param key [String, Symbol] A join key.
|
476
|
+
# @macro join_common_after
|
477
|
+
#
|
478
|
+
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
479
|
+
#
|
480
|
+
# @macro join_common_before
|
481
|
+
# @param keys [::Array<String, Symbol>] Join keys.
|
482
|
+
# @macro join_common_after
|
483
|
+
#
|
484
|
+
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
485
|
+
#
|
486
|
+
# @macro join_common_before
|
487
|
+
# @param keys [Hash] Specify join keys in `self` and `right` separately.
|
488
|
+
# @option keys [String, Symbol, ::Array<String, Symbol>] :left
|
489
|
+
# Join keys in `self`.
|
490
|
+
# @option keys [String, Symbol, ::Array<String, Symbol>] :right
|
491
|
+
# Join keys in `right`.
|
492
|
+
# @macro join_common_after
|
493
|
+
#
|
494
|
+
# @since 7.0.0
|
495
|
+
def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
496
|
+
plan = ExecutePlan.new
|
497
|
+
left_node = plan.build_source_node(self)
|
498
|
+
right_node = plan.build_source_node(right)
|
499
|
+
if keys.is_a?(Hash)
|
500
|
+
left_keys = keys[:left]
|
501
|
+
right_keys = keys[:right]
|
502
|
+
else
|
503
|
+
left_keys = keys
|
504
|
+
right_keys = keys
|
505
|
+
end
|
506
|
+
left_keys = Array(left_keys)
|
507
|
+
right_keys = Array(right_keys)
|
508
|
+
hash_join_node_options = HashJoinNodeOptions.new(type,
|
509
|
+
left_keys,
|
510
|
+
right_keys)
|
511
|
+
unless left_outputs.nil?
|
512
|
+
hash_join_node_options.left_outputs = left_outputs
|
513
|
+
end
|
514
|
+
unless right_outputs.nil?
|
515
|
+
hash_join_node_options.right_outputs = right_outputs
|
516
|
+
end
|
517
|
+
hash_join_node = plan.build_hash_join_node(left_node,
|
518
|
+
right_node,
|
519
|
+
hash_join_node_options)
|
520
|
+
sink_node_options = SinkNodeOptions.new
|
521
|
+
plan.build_sink_node(hash_join_node, sink_node_options)
|
522
|
+
plan.validate
|
523
|
+
plan.start
|
524
|
+
plan.wait
|
525
|
+
reader = sink_node_options.get_reader(hash_join_node.output_schema)
|
526
|
+
reader.read_all
|
527
|
+
end
|
528
|
+
|
457
529
|
alias_method :to_s_raw, :to_s
|
458
530
|
def to_s(options={})
|
459
531
|
format = options[:format]
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -48,18 +48,10 @@ Gem::Specification.new do |spec|
|
|
48
48
|
|
49
49
|
spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
|
50
50
|
spec.add_runtime_dependency("extpp", ">= 0.0.7")
|
51
|
-
spec.add_runtime_dependency("gio2", ">= 3.
|
51
|
+
spec.add_runtime_dependency("gio2", ">= 3.5.0")
|
52
52
|
spec.add_runtime_dependency("native-package-installer")
|
53
53
|
spec.add_runtime_dependency("pkg-config")
|
54
54
|
|
55
|
-
spec.add_development_dependency("benchmark-driver")
|
56
|
-
spec.add_development_dependency("bundler")
|
57
|
-
spec.add_development_dependency("faker")
|
58
|
-
spec.add_development_dependency("rake")
|
59
|
-
spec.add_development_dependency("redcarpet")
|
60
|
-
spec.add_development_dependency("test-unit")
|
61
|
-
spec.add_development_dependency("yard")
|
62
|
-
|
63
55
|
required_msys2_package_version = version_components[0, 3].join(".")
|
64
56
|
spec.metadata["msys2_mingw_dependencies"] =
|
65
57
|
"arrow>=#{required_msys2_package_version}"
|
data/test/helper.rb
CHANGED
@@ -394,6 +394,20 @@ module RawRecordsDenseUnionArrayTests
|
|
394
394
|
assert_equal(records, target.raw_records)
|
395
395
|
end
|
396
396
|
|
397
|
+
def test_map
|
398
|
+
records = [
|
399
|
+
[{"0" => {"key1" => true, "key2" => nil}}],
|
400
|
+
[{"1" => nil}],
|
401
|
+
]
|
402
|
+
target = build({
|
403
|
+
type: :map,
|
404
|
+
key: :string,
|
405
|
+
item: :boolean,
|
406
|
+
},
|
407
|
+
records)
|
408
|
+
assert_equal(records, target.raw_records)
|
409
|
+
end
|
410
|
+
|
397
411
|
def test_sparse_union
|
398
412
|
omit("Need to add support for SparseUnionArrayBuilder")
|
399
413
|
records = [
|
@@ -451,6 +451,25 @@ module RawRecordsListArrayTests
|
|
451
451
|
assert_equal(records, target.raw_records)
|
452
452
|
end
|
453
453
|
|
454
|
+
def test_map
|
455
|
+
records = [
|
456
|
+
[
|
457
|
+
[
|
458
|
+
{"key1" => true, "key2" => nil},
|
459
|
+
nil,
|
460
|
+
],
|
461
|
+
],
|
462
|
+
[nil],
|
463
|
+
]
|
464
|
+
target = build({
|
465
|
+
type: :map,
|
466
|
+
key: :string,
|
467
|
+
item: :boolean,
|
468
|
+
},
|
469
|
+
records)
|
470
|
+
assert_equal(records, target.raw_records)
|
471
|
+
end
|
472
|
+
|
454
473
|
def test_sparse
|
455
474
|
omit("Need to add support for SparseUnionArrayBuilder")
|
456
475
|
records = [
|