red-arrow 4.0.1 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/README.md +23 -0
  4. data/ext/arrow/arrow.cpp +3 -0
  5. data/ext/arrow/converters.cpp +5 -0
  6. data/ext/arrow/converters.hpp +126 -0
  7. data/ext/arrow/extconf.rb +13 -0
  8. data/ext/arrow/memory-view.cpp +311 -0
  9. data/ext/arrow/memory-view.hpp +26 -0
  10. data/ext/arrow/raw-records.cpp +1 -0
  11. data/ext/arrow/values.cpp +1 -0
  12. data/lib/arrow/aggregate-node-options.rb +35 -0
  13. data/lib/arrow/aggregation.rb +46 -0
  14. data/lib/arrow/array-builder.rb +5 -0
  15. data/lib/arrow/array.rb +12 -0
  16. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  17. data/lib/arrow/buffer.rb +10 -6
  18. data/lib/arrow/column-containable.rb +100 -1
  19. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  20. data/lib/arrow/datum.rb +102 -0
  21. data/lib/arrow/equal-options.rb +38 -0
  22. data/lib/arrow/expression.rb +48 -0
  23. data/lib/arrow/file-system.rb +34 -0
  24. data/lib/arrow/function.rb +52 -0
  25. data/lib/arrow/group.rb +116 -124
  26. data/lib/arrow/loader.rb +58 -0
  27. data/lib/arrow/map-array-builder.rb +109 -0
  28. data/lib/arrow/map-array.rb +26 -0
  29. data/lib/arrow/map-data-type.rb +89 -0
  30. data/lib/arrow/path-extension.rb +1 -1
  31. data/lib/arrow/record-batch-reader.rb +41 -0
  32. data/lib/arrow/record-batch.rb +0 -2
  33. data/lib/arrow/s3-global-options.rb +38 -0
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +61 -55
  37. data/lib/arrow/sort-options.rb +8 -8
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  40. data/lib/arrow/symbol-values-appendable.rb +34 -0
  41. data/lib/arrow/table-concatenate-options.rb +36 -0
  42. data/lib/arrow/table-formatter.rb +141 -17
  43. data/lib/arrow/table-list-formatter.rb +5 -3
  44. data/lib/arrow/table-loader.rb +119 -44
  45. data/lib/arrow/table-saver.rb +36 -5
  46. data/lib/arrow/table-table-formatter.rb +7 -31
  47. data/lib/arrow/table.rb +112 -40
  48. data/lib/arrow/version.rb +1 -1
  49. data/red-arrow.gemspec +1 -9
  50. data/test/helper.rb +3 -0
  51. data/test/raw-records/test-dense-union-array.rb +14 -0
  52. data/test/raw-records/test-list-array.rb +19 -0
  53. data/test/raw-records/test-map-array.rb +441 -0
  54. data/test/raw-records/test-sparse-union-array.rb +14 -0
  55. data/test/raw-records/test-struct-array.rb +15 -0
  56. data/test/test-array-builder.rb +7 -0
  57. data/test/test-array.rb +34 -0
  58. data/test/test-binary-dictionary-array-builder.rb +103 -0
  59. data/test/test-boolean-scalar.rb +26 -0
  60. data/test/test-csv-loader.rb +8 -8
  61. data/test/test-expression.rb +40 -0
  62. data/test/test-float-scalar.rb +46 -0
  63. data/test/test-function.rb +210 -0
  64. data/test/test-group.rb +75 -51
  65. data/test/test-map-array-builder.rb +110 -0
  66. data/test/test-map-array.rb +33 -0
  67. data/test/test-map-data-type.rb +36 -0
  68. data/test/test-memory-view.rb +434 -0
  69. data/test/test-record-batch-reader.rb +46 -0
  70. data/test/test-record-batch.rb +42 -0
  71. data/test/test-slicer.rb +166 -167
  72. data/test/test-string-dictionary-array-builder.rb +103 -0
  73. data/test/test-table.rb +376 -56
  74. data/test/values/test-dense-union-array.rb +14 -0
  75. data/test/values/test-list-array.rb +17 -0
  76. data/test/values/test-map-array.rb +433 -0
  77. data/test/values/test-sparse-union-array.rb +14 -0
  78. data/test/values/test-struct-array.rb +15 -0
  79. metadata +117 -168
data/lib/arrow/table.rb CHANGED
@@ -195,8 +195,6 @@ module Arrow
195
195
  alias_method :size, :n_rows
196
196
  alias_method :length, :n_rows
197
197
 
198
- alias_method :[], :find_column
199
-
200
198
  alias_method :slice_raw, :slice
201
199
 
202
200
  # @overload slice(offset, length)
@@ -236,6 +234,12 @@ module Arrow
236
234
  # @return [Arrow::Table]
237
235
  # The sub `Arrow::Table` that covers only rows of the range of indices.
238
236
  #
237
+ # @overload slice(conditions)
238
+ #
239
+ # @param conditions [Hash] The conditions to select records.
240
+ # @return [Arrow::Table]
241
+ # The sub `Arrow::Table` that covers only rows matched by condition
242
+ #
239
243
  # @overload slice
240
244
  #
241
245
  # @yield [slicer] Gives slicer that constructs condition to select records.
@@ -263,12 +267,37 @@ module Arrow
263
267
  expected_n_args = nil
264
268
  case args.size
265
269
  when 1
266
- if args[0].is_a?(Integer)
270
+ case args[0]
271
+ when Integer
267
272
  index = args[0]
268
273
  index += n_rows if index < 0
269
274
  return nil if index < 0
270
275
  return nil if index >= n_rows
271
276
  return Record.new(self, index)
277
+ when Hash
278
+ condition_pairs = args[0]
279
+ slicer = Slicer.new(self)
280
+ conditions = []
281
+ condition_pairs.each do |key, value|
282
+ case value
283
+ when Range
284
+ # TODO: Optimize "begin <= key <= end" case by missing "between" kernel
285
+ # https://issues.apache.org/jira/browse/ARROW-9843
286
+ unless value.begin.nil?
287
+ conditions << (slicer[key] >= value.begin)
288
+ end
289
+ unless value.end.nil?
290
+ if value.exclude_end?
291
+ conditions << (slicer[key] < value.end)
292
+ else
293
+ conditions << (slicer[key] <= value.end)
294
+ end
295
+ end
296
+ else
297
+ conditions << (slicer[key] == value)
298
+ end
299
+ end
300
+ slicers << conditions.inject(:&)
272
301
  else
273
302
  slicers << args[0]
274
303
  end
@@ -397,41 +426,6 @@ module Arrow
397
426
  remove_column_raw(index)
398
427
  end
399
428
 
400
- # TODO
401
- #
402
- # @return [Arrow::Table]
403
- def select_columns(*selectors, &block)
404
- if selectors.empty?
405
- return to_enum(__method__) unless block_given?
406
- selected_columns = columns.select(&block)
407
- else
408
- selected_columns = []
409
- selectors.each do |selector|
410
- case selector
411
- when String, Symbol
412
- column = find_column(selector)
413
- if column.nil?
414
- message = "unknown column: #{selector.inspect}: #{inspect}"
415
- raise KeyError.new(message)
416
- end
417
- selected_columns << column
418
- when Range
419
- selected_columns.concat(columns[selector])
420
- else
421
- column = columns[selector]
422
- if column.nil?
423
- message = "out of index (0..#{n_columns - 1}): " +
424
- "#{selector.inspect}: #{inspect}"
425
- raise IndexError.new(message)
426
- end
427
- selected_columns << column
428
- end
429
- end
430
- selected_columns = selected_columns.select(&block) if block_given?
431
- end
432
- self.class.new(selected_columns)
433
- end
434
-
435
429
  # Experimental
436
430
  def group(*keys)
437
431
  Group.new(self, keys)
@@ -442,8 +436,8 @@ module Arrow
442
436
  RollingWindow.new(self, size)
443
437
  end
444
438
 
445
- def save(path, options={})
446
- saver = TableSaver.new(self, path, options)
439
+ def save(output, options={})
440
+ saver = TableSaver.new(self, output, options)
447
441
  saver.save
448
442
  end
449
443
 
@@ -454,6 +448,84 @@ module Arrow
454
448
  self.class.new(schema, packed_arrays)
455
449
  end
456
450
 
451
+ # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
452
+ # @!macro join_common_before
453
+ # @param right [Arrow::Table] The right table.
454
+ #
455
+ # Join columns with `right` on join key columns.
456
+ #
457
+ # @!macro join_common_after
458
+ # @param type [Arrow::JoinType] How to join.
459
+ # @param left_outputs [::Array<String, Symbol>] Output columns in
460
+ # `self`.
461
+ #
462
+ # If both of `left_outputs` and `right_outputs` aren't
463
+ # specified, all columns in `self` and `right` are
464
+ # outputted.
465
+ # @param right_outputs [::Array<String, Symbol>] Output columns in
466
+ # `right`.
467
+ #
468
+ # If both of `left_outputs` and `right_outputs` aren't
469
+ # specified, all columns in `self` and `right` are
470
+ # outputted.
471
+ # @return [Arrow::Table]
472
+ # The joined `Arrow::Table`.
473
+ #
474
+ # @macro join_common_before
475
+ # @param key [String, Symbol] A join key.
476
+ # @macro join_common_after
477
+ #
478
+ # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
479
+ #
480
+ # @macro join_common_before
481
+ # @param keys [::Array<String, Symbol>] Join keys.
482
+ # @macro join_common_after
483
+ #
484
+ # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
485
+ #
486
+ # @macro join_common_before
487
+ # @param keys [Hash] Specify join keys in `self` and `right` separately.
488
+ # @option keys [String, Symbol, ::Array<String, Symbol>] :left
489
+ # Join keys in `self`.
490
+ # @option keys [String, Symbol, ::Array<String, Symbol>] :right
491
+ # Join keys in `right`.
492
+ # @macro join_common_after
493
+ #
494
+ # @since 7.0.0
495
+ def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
496
+ plan = ExecutePlan.new
497
+ left_node = plan.build_source_node(self)
498
+ right_node = plan.build_source_node(right)
499
+ if keys.is_a?(Hash)
500
+ left_keys = keys[:left]
501
+ right_keys = keys[:right]
502
+ else
503
+ left_keys = keys
504
+ right_keys = keys
505
+ end
506
+ left_keys = Array(left_keys)
507
+ right_keys = Array(right_keys)
508
+ hash_join_node_options = HashJoinNodeOptions.new(type,
509
+ left_keys,
510
+ right_keys)
511
+ unless left_outputs.nil?
512
+ hash_join_node_options.left_outputs = left_outputs
513
+ end
514
+ unless right_outputs.nil?
515
+ hash_join_node_options.right_outputs = right_outputs
516
+ end
517
+ hash_join_node = plan.build_hash_join_node(left_node,
518
+ right_node,
519
+ hash_join_node_options)
520
+ sink_node_options = SinkNodeOptions.new
521
+ plan.build_sink_node(hash_join_node, sink_node_options)
522
+ plan.validate
523
+ plan.start
524
+ plan.wait
525
+ reader = sink_node_options.get_reader(hash_join_node.output_schema)
526
+ reader.read_all
527
+ end
528
+
457
529
  alias_method :to_s_raw, :to_s
458
530
  def to_s(options={})
459
531
  format = options[:format]
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "4.0.1"
19
+ VERSION = "7.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -48,18 +48,10 @@ Gem::Specification.new do |spec|
48
48
 
49
49
  spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
50
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
51
- spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
+ spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")
53
53
  spec.add_runtime_dependency("pkg-config")
54
54
 
55
- spec.add_development_dependency("benchmark-driver")
56
- spec.add_development_dependency("bundler")
57
- spec.add_development_dependency("faker")
58
- spec.add_development_dependency("rake")
59
- spec.add_development_dependency("redcarpet")
60
- spec.add_development_dependency("test-unit")
61
- spec.add_development_dependency("yard")
62
-
63
55
  required_msys2_package_version = version_components[0, 3].join(".")
64
56
  spec.metadata["msys2_mingw_dependencies"] =
65
57
  "arrow>=#{required_msys2_package_version}"
data/test/helper.rb CHANGED
@@ -17,8 +17,11 @@
17
17
 
18
18
  require "arrow"
19
19
 
20
+ require "fiddle"
20
21
  require "pathname"
21
22
  require "tempfile"
23
+ require "timeout"
24
+ require "webrick"
22
25
  require "zlib"
23
26
 
24
27
  require "test-unit"
@@ -394,6 +394,20 @@ module RawRecordsDenseUnionArrayTests
394
394
  assert_equal(records, target.raw_records)
395
395
  end
396
396
 
397
+ def test_map
398
+ records = [
399
+ [{"0" => {"key1" => true, "key2" => nil}}],
400
+ [{"1" => nil}],
401
+ ]
402
+ target = build({
403
+ type: :map,
404
+ key: :string,
405
+ item: :boolean,
406
+ },
407
+ records)
408
+ assert_equal(records, target.raw_records)
409
+ end
410
+
397
411
  def test_sparse_union
398
412
  omit("Need to add support for SparseUnionArrayBuilder")
399
413
  records = [
@@ -451,6 +451,25 @@ module RawRecordsListArrayTests
451
451
  assert_equal(records, target.raw_records)
452
452
  end
453
453
 
454
+ def test_map
455
+ records = [
456
+ [
457
+ [
458
+ {"key1" => true, "key2" => nil},
459
+ nil,
460
+ ],
461
+ ],
462
+ [nil],
463
+ ]
464
+ target = build({
465
+ type: :map,
466
+ key: :string,
467
+ item: :boolean,
468
+ },
469
+ records)
470
+ assert_equal(records, target.raw_records)
471
+ end
472
+
454
473
  def test_sparse
455
474
  omit("Need to add support for SparseUnionArrayBuilder")
456
475
  records = [