red-arrow 4.0.1 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/README.md +23 -0
  4. data/ext/arrow/arrow.cpp +3 -0
  5. data/ext/arrow/converters.cpp +5 -0
  6. data/ext/arrow/converters.hpp +126 -0
  7. data/ext/arrow/extconf.rb +13 -0
  8. data/ext/arrow/memory-view.cpp +311 -0
  9. data/ext/arrow/memory-view.hpp +26 -0
  10. data/ext/arrow/raw-records.cpp +1 -0
  11. data/ext/arrow/values.cpp +1 -0
  12. data/lib/arrow/aggregate-node-options.rb +35 -0
  13. data/lib/arrow/aggregation.rb +46 -0
  14. data/lib/arrow/array-builder.rb +5 -0
  15. data/lib/arrow/array.rb +12 -0
  16. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  17. data/lib/arrow/buffer.rb +10 -6
  18. data/lib/arrow/column-containable.rb +100 -1
  19. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  20. data/lib/arrow/datum.rb +102 -0
  21. data/lib/arrow/equal-options.rb +38 -0
  22. data/lib/arrow/expression.rb +48 -0
  23. data/lib/arrow/file-system.rb +34 -0
  24. data/lib/arrow/function.rb +52 -0
  25. data/lib/arrow/group.rb +116 -124
  26. data/lib/arrow/loader.rb +58 -0
  27. data/lib/arrow/map-array-builder.rb +109 -0
  28. data/lib/arrow/map-array.rb +26 -0
  29. data/lib/arrow/map-data-type.rb +89 -0
  30. data/lib/arrow/path-extension.rb +1 -1
  31. data/lib/arrow/record-batch-reader.rb +41 -0
  32. data/lib/arrow/record-batch.rb +0 -2
  33. data/lib/arrow/s3-global-options.rb +38 -0
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +61 -55
  37. data/lib/arrow/sort-options.rb +8 -8
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  40. data/lib/arrow/symbol-values-appendable.rb +34 -0
  41. data/lib/arrow/table-concatenate-options.rb +36 -0
  42. data/lib/arrow/table-formatter.rb +141 -17
  43. data/lib/arrow/table-list-formatter.rb +5 -3
  44. data/lib/arrow/table-loader.rb +119 -44
  45. data/lib/arrow/table-saver.rb +36 -5
  46. data/lib/arrow/table-table-formatter.rb +7 -31
  47. data/lib/arrow/table.rb +112 -40
  48. data/lib/arrow/version.rb +1 -1
  49. data/red-arrow.gemspec +1 -9
  50. data/test/helper.rb +3 -0
  51. data/test/raw-records/test-dense-union-array.rb +14 -0
  52. data/test/raw-records/test-list-array.rb +19 -0
  53. data/test/raw-records/test-map-array.rb +441 -0
  54. data/test/raw-records/test-sparse-union-array.rb +14 -0
  55. data/test/raw-records/test-struct-array.rb +15 -0
  56. data/test/test-array-builder.rb +7 -0
  57. data/test/test-array.rb +34 -0
  58. data/test/test-binary-dictionary-array-builder.rb +103 -0
  59. data/test/test-boolean-scalar.rb +26 -0
  60. data/test/test-csv-loader.rb +8 -8
  61. data/test/test-expression.rb +40 -0
  62. data/test/test-float-scalar.rb +46 -0
  63. data/test/test-function.rb +210 -0
  64. data/test/test-group.rb +75 -51
  65. data/test/test-map-array-builder.rb +110 -0
  66. data/test/test-map-array.rb +33 -0
  67. data/test/test-map-data-type.rb +36 -0
  68. data/test/test-memory-view.rb +434 -0
  69. data/test/test-record-batch-reader.rb +46 -0
  70. data/test/test-record-batch.rb +42 -0
  71. data/test/test-slicer.rb +166 -167
  72. data/test/test-string-dictionary-array-builder.rb +103 -0
  73. data/test/test-table.rb +376 -56
  74. data/test/values/test-dense-union-array.rb +14 -0
  75. data/test/values/test-list-array.rb +17 -0
  76. data/test/values/test-map-array.rb +433 -0
  77. data/test/values/test-sparse-union-array.rb +14 -0
  78. data/test/values/test-struct-array.rb +15 -0
  79. metadata +117 -168
data/lib/arrow/table.rb CHANGED
@@ -195,8 +195,6 @@ module Arrow
195
195
  alias_method :size, :n_rows
196
196
  alias_method :length, :n_rows
197
197
 
198
- alias_method :[], :find_column
199
-
200
198
  alias_method :slice_raw, :slice
201
199
 
202
200
  # @overload slice(offset, length)
@@ -236,6 +234,12 @@ module Arrow
236
234
  # @return [Arrow::Table]
237
235
  # The sub `Arrow::Table` that covers only rows of the range of indices.
238
236
  #
237
+ # @overload slice(conditions)
238
+ #
239
+ # @param conditions [Hash] The conditions to select records.
240
+ # @return [Arrow::Table]
241
+ # The sub `Arrow::Table` that covers only rows matched by condition
242
+ #
239
243
  # @overload slice
240
244
  #
241
245
  # @yield [slicer] Gives slicer that constructs condition to select records.
@@ -263,12 +267,37 @@ module Arrow
263
267
  expected_n_args = nil
264
268
  case args.size
265
269
  when 1
266
- if args[0].is_a?(Integer)
270
+ case args[0]
271
+ when Integer
267
272
  index = args[0]
268
273
  index += n_rows if index < 0
269
274
  return nil if index < 0
270
275
  return nil if index >= n_rows
271
276
  return Record.new(self, index)
277
+ when Hash
278
+ condition_pairs = args[0]
279
+ slicer = Slicer.new(self)
280
+ conditions = []
281
+ condition_pairs.each do |key, value|
282
+ case value
283
+ when Range
284
+ # TODO: Optimize "begin <= key <= end" case by missing "between" kernel
285
+ # https://issues.apache.org/jira/browse/ARROW-9843
286
+ unless value.begin.nil?
287
+ conditions << (slicer[key] >= value.begin)
288
+ end
289
+ unless value.end.nil?
290
+ if value.exclude_end?
291
+ conditions << (slicer[key] < value.end)
292
+ else
293
+ conditions << (slicer[key] <= value.end)
294
+ end
295
+ end
296
+ else
297
+ conditions << (slicer[key] == value)
298
+ end
299
+ end
300
+ slicers << conditions.inject(:&)
272
301
  else
273
302
  slicers << args[0]
274
303
  end
@@ -397,41 +426,6 @@ module Arrow
397
426
  remove_column_raw(index)
398
427
  end
399
428
 
400
- # TODO
401
- #
402
- # @return [Arrow::Table]
403
- def select_columns(*selectors, &block)
404
- if selectors.empty?
405
- return to_enum(__method__) unless block_given?
406
- selected_columns = columns.select(&block)
407
- else
408
- selected_columns = []
409
- selectors.each do |selector|
410
- case selector
411
- when String, Symbol
412
- column = find_column(selector)
413
- if column.nil?
414
- message = "unknown column: #{selector.inspect}: #{inspect}"
415
- raise KeyError.new(message)
416
- end
417
- selected_columns << column
418
- when Range
419
- selected_columns.concat(columns[selector])
420
- else
421
- column = columns[selector]
422
- if column.nil?
423
- message = "out of index (0..#{n_columns - 1}): " +
424
- "#{selector.inspect}: #{inspect}"
425
- raise IndexError.new(message)
426
- end
427
- selected_columns << column
428
- end
429
- end
430
- selected_columns = selected_columns.select(&block) if block_given?
431
- end
432
- self.class.new(selected_columns)
433
- end
434
-
435
429
  # Experimental
436
430
  def group(*keys)
437
431
  Group.new(self, keys)
@@ -442,8 +436,8 @@ module Arrow
442
436
  RollingWindow.new(self, size)
443
437
  end
444
438
 
445
- def save(path, options={})
446
- saver = TableSaver.new(self, path, options)
439
+ def save(output, options={})
440
+ saver = TableSaver.new(self, output, options)
447
441
  saver.save
448
442
  end
449
443
 
@@ -454,6 +448,84 @@ module Arrow
454
448
  self.class.new(schema, packed_arrays)
455
449
  end
456
450
 
451
+ # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
452
+ # @!macro join_common_before
453
+ # @param right [Arrow::Table] The right table.
454
+ #
455
+ # Join columns with `right` on join key columns.
456
+ #
457
+ # @!macro join_common_after
458
+ # @param type [Arrow::JoinType] How to join.
459
+ # @param left_outputs [::Array<String, Symbol>] Output columns in
460
+ # `self`.
461
+ #
462
+ # If both of `left_outputs` and `right_outputs` aren't
463
+ # specified, all columns in `self` and `right` are
464
+ # outputted.
465
+ # @param right_outputs [::Array<String, Symbol>] Output columns in
466
+ # `right`.
467
+ #
468
+ # If both of `left_outputs` and `right_outputs` aren't
469
+ # specified, all columns in `self` and `right` are
470
+ # outputted.
471
+ # @return [Arrow::Table]
472
+ # The joined `Arrow::Table`.
473
+ #
474
+ # @macro join_common_before
475
+ # @param key [String, Symbol] A join key.
476
+ # @macro join_common_after
477
+ #
478
+ # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
479
+ #
480
+ # @macro join_common_before
481
+ # @param keys [::Array<String, Symbol>] Join keys.
482
+ # @macro join_common_after
483
+ #
484
+ # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
485
+ #
486
+ # @macro join_common_before
487
+ # @param keys [Hash] Specify join keys in `self` and `right` separately.
488
+ # @option keys [String, Symbol, ::Array<String, Symbol>] :left
489
+ # Join keys in `self`.
490
+ # @option keys [String, Symbol, ::Array<String, Symbol>] :right
491
+ # Join keys in `right`.
492
+ # @macro join_common_after
493
+ #
494
+ # @since 7.0.0
495
+ def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
496
+ plan = ExecutePlan.new
497
+ left_node = plan.build_source_node(self)
498
+ right_node = plan.build_source_node(right)
499
+ if keys.is_a?(Hash)
500
+ left_keys = keys[:left]
501
+ right_keys = keys[:right]
502
+ else
503
+ left_keys = keys
504
+ right_keys = keys
505
+ end
506
+ left_keys = Array(left_keys)
507
+ right_keys = Array(right_keys)
508
+ hash_join_node_options = HashJoinNodeOptions.new(type,
509
+ left_keys,
510
+ right_keys)
511
+ unless left_outputs.nil?
512
+ hash_join_node_options.left_outputs = left_outputs
513
+ end
514
+ unless right_outputs.nil?
515
+ hash_join_node_options.right_outputs = right_outputs
516
+ end
517
+ hash_join_node = plan.build_hash_join_node(left_node,
518
+ right_node,
519
+ hash_join_node_options)
520
+ sink_node_options = SinkNodeOptions.new
521
+ plan.build_sink_node(hash_join_node, sink_node_options)
522
+ plan.validate
523
+ plan.start
524
+ plan.wait
525
+ reader = sink_node_options.get_reader(hash_join_node.output_schema)
526
+ reader.read_all
527
+ end
528
+
457
529
  alias_method :to_s_raw, :to_s
458
530
  def to_s(options={})
459
531
  format = options[:format]
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "4.0.1"
19
+ VERSION = "7.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -48,18 +48,10 @@ Gem::Specification.new do |spec|
48
48
 
49
49
  spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
50
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
51
- spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
+ spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")
53
53
  spec.add_runtime_dependency("pkg-config")
54
54
 
55
- spec.add_development_dependency("benchmark-driver")
56
- spec.add_development_dependency("bundler")
57
- spec.add_development_dependency("faker")
58
- spec.add_development_dependency("rake")
59
- spec.add_development_dependency("redcarpet")
60
- spec.add_development_dependency("test-unit")
61
- spec.add_development_dependency("yard")
62
-
63
55
  required_msys2_package_version = version_components[0, 3].join(".")
64
56
  spec.metadata["msys2_mingw_dependencies"] =
65
57
  "arrow>=#{required_msys2_package_version}"
data/test/helper.rb CHANGED
@@ -17,8 +17,11 @@
17
17
 
18
18
  require "arrow"
19
19
 
20
+ require "fiddle"
20
21
  require "pathname"
21
22
  require "tempfile"
23
+ require "timeout"
24
+ require "webrick"
22
25
  require "zlib"
23
26
 
24
27
  require "test-unit"
@@ -394,6 +394,20 @@ module RawRecordsDenseUnionArrayTests
394
394
  assert_equal(records, target.raw_records)
395
395
  end
396
396
 
397
+ def test_map
398
+ records = [
399
+ [{"0" => {"key1" => true, "key2" => nil}}],
400
+ [{"1" => nil}],
401
+ ]
402
+ target = build({
403
+ type: :map,
404
+ key: :string,
405
+ item: :boolean,
406
+ },
407
+ records)
408
+ assert_equal(records, target.raw_records)
409
+ end
410
+
397
411
  def test_sparse_union
398
412
  omit("Need to add support for SparseUnionArrayBuilder")
399
413
  records = [
@@ -451,6 +451,25 @@ module RawRecordsListArrayTests
451
451
  assert_equal(records, target.raw_records)
452
452
  end
453
453
 
454
+ def test_map
455
+ records = [
456
+ [
457
+ [
458
+ {"key1" => true, "key2" => nil},
459
+ nil,
460
+ ],
461
+ ],
462
+ [nil],
463
+ ]
464
+ target = build({
465
+ type: :map,
466
+ key: :string,
467
+ item: :boolean,
468
+ },
469
+ records)
470
+ assert_equal(records, target.raw_records)
471
+ end
472
+
454
473
  def test_sparse
455
474
  omit("Need to add support for SparseUnionArrayBuilder")
456
475
  records = [