red-arrow 4.0.1 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +12 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/datum.rb +102 -0
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +58 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +119 -44
- data/lib/arrow/table-saver.rb +36 -5
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +112 -40
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -9
- data/test/helper.rb +3 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +34 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +210 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +376 -56
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +117 -168
data/lib/arrow/table.rb
CHANGED
@@ -195,8 +195,6 @@ module Arrow
|
|
195
195
|
alias_method :size, :n_rows
|
196
196
|
alias_method :length, :n_rows
|
197
197
|
|
198
|
-
alias_method :[], :find_column
|
199
|
-
|
200
198
|
alias_method :slice_raw, :slice
|
201
199
|
|
202
200
|
# @overload slice(offset, length)
|
@@ -236,6 +234,12 @@ module Arrow
|
|
236
234
|
# @return [Arrow::Table]
|
237
235
|
# The sub `Arrow::Table` that covers only rows of the range of indices.
|
238
236
|
#
|
237
|
+
# @overload slice(conditions)
|
238
|
+
#
|
239
|
+
# @param conditions [Hash] The conditions to select records.
|
240
|
+
# @return [Arrow::Table]
|
241
|
+
# The sub `Arrow::Table` that covers only rows matched by condition
|
242
|
+
#
|
239
243
|
# @overload slice
|
240
244
|
#
|
241
245
|
# @yield [slicer] Gives slicer that constructs condition to select records.
|
@@ -263,12 +267,37 @@ module Arrow
|
|
263
267
|
expected_n_args = nil
|
264
268
|
case args.size
|
265
269
|
when 1
|
266
|
-
|
270
|
+
case args[0]
|
271
|
+
when Integer
|
267
272
|
index = args[0]
|
268
273
|
index += n_rows if index < 0
|
269
274
|
return nil if index < 0
|
270
275
|
return nil if index >= n_rows
|
271
276
|
return Record.new(self, index)
|
277
|
+
when Hash
|
278
|
+
condition_pairs = args[0]
|
279
|
+
slicer = Slicer.new(self)
|
280
|
+
conditions = []
|
281
|
+
condition_pairs.each do |key, value|
|
282
|
+
case value
|
283
|
+
when Range
|
284
|
+
# TODO: Optimize "begin <= key <= end" case by missing "between" kernel
|
285
|
+
# https://issues.apache.org/jira/browse/ARROW-9843
|
286
|
+
unless value.begin.nil?
|
287
|
+
conditions << (slicer[key] >= value.begin)
|
288
|
+
end
|
289
|
+
unless value.end.nil?
|
290
|
+
if value.exclude_end?
|
291
|
+
conditions << (slicer[key] < value.end)
|
292
|
+
else
|
293
|
+
conditions << (slicer[key] <= value.end)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
else
|
297
|
+
conditions << (slicer[key] == value)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
slicers << conditions.inject(:&)
|
272
301
|
else
|
273
302
|
slicers << args[0]
|
274
303
|
end
|
@@ -397,41 +426,6 @@ module Arrow
|
|
397
426
|
remove_column_raw(index)
|
398
427
|
end
|
399
428
|
|
400
|
-
# TODO
|
401
|
-
#
|
402
|
-
# @return [Arrow::Table]
|
403
|
-
def select_columns(*selectors, &block)
|
404
|
-
if selectors.empty?
|
405
|
-
return to_enum(__method__) unless block_given?
|
406
|
-
selected_columns = columns.select(&block)
|
407
|
-
else
|
408
|
-
selected_columns = []
|
409
|
-
selectors.each do |selector|
|
410
|
-
case selector
|
411
|
-
when String, Symbol
|
412
|
-
column = find_column(selector)
|
413
|
-
if column.nil?
|
414
|
-
message = "unknown column: #{selector.inspect}: #{inspect}"
|
415
|
-
raise KeyError.new(message)
|
416
|
-
end
|
417
|
-
selected_columns << column
|
418
|
-
when Range
|
419
|
-
selected_columns.concat(columns[selector])
|
420
|
-
else
|
421
|
-
column = columns[selector]
|
422
|
-
if column.nil?
|
423
|
-
message = "out of index (0..#{n_columns - 1}): " +
|
424
|
-
"#{selector.inspect}: #{inspect}"
|
425
|
-
raise IndexError.new(message)
|
426
|
-
end
|
427
|
-
selected_columns << column
|
428
|
-
end
|
429
|
-
end
|
430
|
-
selected_columns = selected_columns.select(&block) if block_given?
|
431
|
-
end
|
432
|
-
self.class.new(selected_columns)
|
433
|
-
end
|
434
|
-
|
435
429
|
# Experimental
|
436
430
|
def group(*keys)
|
437
431
|
Group.new(self, keys)
|
@@ -442,8 +436,8 @@ module Arrow
|
|
442
436
|
RollingWindow.new(self, size)
|
443
437
|
end
|
444
438
|
|
445
|
-
def save(
|
446
|
-
saver = TableSaver.new(self,
|
439
|
+
def save(output, options={})
|
440
|
+
saver = TableSaver.new(self, output, options)
|
447
441
|
saver.save
|
448
442
|
end
|
449
443
|
|
@@ -454,6 +448,84 @@ module Arrow
|
|
454
448
|
self.class.new(schema, packed_arrays)
|
455
449
|
end
|
456
450
|
|
451
|
+
# @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
|
452
|
+
# @!macro join_common_before
|
453
|
+
# @param right [Arrow::Table] The right table.
|
454
|
+
#
|
455
|
+
# Join columns with `right` on join key columns.
|
456
|
+
#
|
457
|
+
# @!macro join_common_after
|
458
|
+
# @param type [Arrow::JoinType] How to join.
|
459
|
+
# @param left_outputs [::Array<String, Symbol>] Output columns in
|
460
|
+
# `self`.
|
461
|
+
#
|
462
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
463
|
+
# specified, all columns in `self` and `right` are
|
464
|
+
# outputted.
|
465
|
+
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
466
|
+
# `right`.
|
467
|
+
#
|
468
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
469
|
+
# specified, all columns in `self` and `right` are
|
470
|
+
# outputted.
|
471
|
+
# @return [Arrow::Table]
|
472
|
+
# The joined `Arrow::Table`.
|
473
|
+
#
|
474
|
+
# @macro join_common_before
|
475
|
+
# @param key [String, Symbol] A join key.
|
476
|
+
# @macro join_common_after
|
477
|
+
#
|
478
|
+
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
479
|
+
#
|
480
|
+
# @macro join_common_before
|
481
|
+
# @param keys [::Array<String, Symbol>] Join keys.
|
482
|
+
# @macro join_common_after
|
483
|
+
#
|
484
|
+
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
485
|
+
#
|
486
|
+
# @macro join_common_before
|
487
|
+
# @param keys [Hash] Specify join keys in `self` and `right` separately.
|
488
|
+
# @option keys [String, Symbol, ::Array<String, Symbol>] :left
|
489
|
+
# Join keys in `self`.
|
490
|
+
# @option keys [String, Symbol, ::Array<String, Symbol>] :right
|
491
|
+
# Join keys in `right`.
|
492
|
+
# @macro join_common_after
|
493
|
+
#
|
494
|
+
# @since 7.0.0
|
495
|
+
def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
496
|
+
plan = ExecutePlan.new
|
497
|
+
left_node = plan.build_source_node(self)
|
498
|
+
right_node = plan.build_source_node(right)
|
499
|
+
if keys.is_a?(Hash)
|
500
|
+
left_keys = keys[:left]
|
501
|
+
right_keys = keys[:right]
|
502
|
+
else
|
503
|
+
left_keys = keys
|
504
|
+
right_keys = keys
|
505
|
+
end
|
506
|
+
left_keys = Array(left_keys)
|
507
|
+
right_keys = Array(right_keys)
|
508
|
+
hash_join_node_options = HashJoinNodeOptions.new(type,
|
509
|
+
left_keys,
|
510
|
+
right_keys)
|
511
|
+
unless left_outputs.nil?
|
512
|
+
hash_join_node_options.left_outputs = left_outputs
|
513
|
+
end
|
514
|
+
unless right_outputs.nil?
|
515
|
+
hash_join_node_options.right_outputs = right_outputs
|
516
|
+
end
|
517
|
+
hash_join_node = plan.build_hash_join_node(left_node,
|
518
|
+
right_node,
|
519
|
+
hash_join_node_options)
|
520
|
+
sink_node_options = SinkNodeOptions.new
|
521
|
+
plan.build_sink_node(hash_join_node, sink_node_options)
|
522
|
+
plan.validate
|
523
|
+
plan.start
|
524
|
+
plan.wait
|
525
|
+
reader = sink_node_options.get_reader(hash_join_node.output_schema)
|
526
|
+
reader.read_all
|
527
|
+
end
|
528
|
+
|
457
529
|
alias_method :to_s_raw, :to_s
|
458
530
|
def to_s(options={})
|
459
531
|
format = options[:format]
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -48,18 +48,10 @@ Gem::Specification.new do |spec|
|
|
48
48
|
|
49
49
|
spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
|
50
50
|
spec.add_runtime_dependency("extpp", ">= 0.0.7")
|
51
|
-
spec.add_runtime_dependency("gio2", ">= 3.
|
51
|
+
spec.add_runtime_dependency("gio2", ">= 3.5.0")
|
52
52
|
spec.add_runtime_dependency("native-package-installer")
|
53
53
|
spec.add_runtime_dependency("pkg-config")
|
54
54
|
|
55
|
-
spec.add_development_dependency("benchmark-driver")
|
56
|
-
spec.add_development_dependency("bundler")
|
57
|
-
spec.add_development_dependency("faker")
|
58
|
-
spec.add_development_dependency("rake")
|
59
|
-
spec.add_development_dependency("redcarpet")
|
60
|
-
spec.add_development_dependency("test-unit")
|
61
|
-
spec.add_development_dependency("yard")
|
62
|
-
|
63
55
|
required_msys2_package_version = version_components[0, 3].join(".")
|
64
56
|
spec.metadata["msys2_mingw_dependencies"] =
|
65
57
|
"arrow>=#{required_msys2_package_version}"
|
data/test/helper.rb
CHANGED
@@ -394,6 +394,20 @@ module RawRecordsDenseUnionArrayTests
|
|
394
394
|
assert_equal(records, target.raw_records)
|
395
395
|
end
|
396
396
|
|
397
|
+
def test_map
|
398
|
+
records = [
|
399
|
+
[{"0" => {"key1" => true, "key2" => nil}}],
|
400
|
+
[{"1" => nil}],
|
401
|
+
]
|
402
|
+
target = build({
|
403
|
+
type: :map,
|
404
|
+
key: :string,
|
405
|
+
item: :boolean,
|
406
|
+
},
|
407
|
+
records)
|
408
|
+
assert_equal(records, target.raw_records)
|
409
|
+
end
|
410
|
+
|
397
411
|
def test_sparse_union
|
398
412
|
omit("Need to add support for SparseUnionArrayBuilder")
|
399
413
|
records = [
|
@@ -451,6 +451,25 @@ module RawRecordsListArrayTests
|
|
451
451
|
assert_equal(records, target.raw_records)
|
452
452
|
end
|
453
453
|
|
454
|
+
def test_map
|
455
|
+
records = [
|
456
|
+
[
|
457
|
+
[
|
458
|
+
{"key1" => true, "key2" => nil},
|
459
|
+
nil,
|
460
|
+
],
|
461
|
+
],
|
462
|
+
[nil],
|
463
|
+
]
|
464
|
+
target = build({
|
465
|
+
type: :map,
|
466
|
+
key: :string,
|
467
|
+
item: :boolean,
|
468
|
+
},
|
469
|
+
records)
|
470
|
+
assert_equal(records, target.raw_records)
|
471
|
+
end
|
472
|
+
|
454
473
|
def test_sparse
|
455
474
|
omit("Need to add support for SparseUnionArrayBuilder")
|
456
475
|
records = [
|