red-arrow 0.16.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +60 -30
  4. data/ext/arrow/extconf.rb +14 -3
  5. data/ext/arrow/raw-records.cpp +2 -1
  6. data/ext/arrow/values.cpp +2 -1
  7. data/lib/arrow/buffer.rb +28 -0
  8. data/lib/arrow/dictionary-array.rb +24 -0
  9. data/lib/arrow/generic-filterable.rb +7 -7
  10. data/lib/arrow/generic-takeable.rb +2 -2
  11. data/lib/arrow/loader.rb +3 -0
  12. data/lib/arrow/null-array-builder.rb +1 -1
  13. data/lib/arrow/raw-table-converter.rb +47 -0
  14. data/lib/arrow/record-batch-iterator.rb +22 -0
  15. data/lib/arrow/record-batch.rb +9 -1
  16. data/lib/arrow/schema.rb +5 -0
  17. data/lib/arrow/struct-array-builder.rb +13 -7
  18. data/lib/arrow/table-saver.rb +8 -4
  19. data/lib/arrow/table.rb +8 -25
  20. data/lib/arrow/version.rb +1 -1
  21. data/test/helper.rb +1 -0
  22. data/test/helper/omittable.rb +36 -0
  23. data/test/raw-records/test-dense-union-array.rb +1 -34
  24. data/test/raw-records/test-sparse-union-array.rb +1 -33
  25. data/test/run-test.rb +14 -3
  26. data/test/test-array.rb +5 -3
  27. data/test/test-buffer.rb +11 -0
  28. data/test/test-chunked-array.rb +5 -3
  29. data/test/test-dense-union-data-type.rb +2 -2
  30. data/test/test-dictionary-array.rb +41 -0
  31. data/test/test-feather.rb +21 -6
  32. data/test/test-record-batch-iterator.rb +37 -0
  33. data/test/test-record-batch.rb +14 -0
  34. data/test/test-schema.rb +16 -0
  35. data/test/test-sparse-union-data-type.rb +2 -2
  36. data/test/test-struct-array-builder.rb +8 -4
  37. data/test/test-table.rb +9 -3
  38. data/test/values/test-dense-union-array.rb +1 -34
  39. data/test/values/test-sparse-union-array.rb +1 -33
  40. metadata +68 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 64b14ef4120f4ab290e8161020902ec2a22631c519d5a133a63ce383610e8545
4
- data.tar.gz: 2f5850520e2dc69568a454cee0d4246909d52f1d49851221b1b9efd3149bc15c
3
+ metadata.gz: 171c7e5854ed116c8324153272827255b18f1083d5ae38866903b3cb0faa2977
4
+ data.tar.gz: 86c21238d83c5e4a8d114464eb8281dca5ddaff7cb6fb3e08b858a67082d4514
5
5
  SHA512:
6
- metadata.gz: 0e19a4da6182437a51f9dad6212436e70b00881674ee6cd29e2a40910fe711fdef4076c81d2778f3ff9e9dd3f45b573c43e90378244cf08d7550b504ad8b53af
7
- data.tar.gz: 547eb8b31fd59d9c1d5fc1163bb25da70154b797d103d3a01f6fda70dddc0b3c2cdb5391b9006c4510be37b8b75988195f809aaead8c91e7fb68f762cc5313de
6
+ metadata.gz: f46692362251101f0d18782e755dd3e2453a168ddefc564a623cc6f16a336d974fca83079faa63198b201e88e3fc678552bd69adca86bf743daf5ef7cd432ff2
7
+ data.tar.gz: 887b098b1d9b832a9197a9ef08e7be63e2d12b1772c45054e02b582d47bc8382c6d73645fce3ea2982d9b1f73454a78fe8372a7b01879cb1d865b971167bc2ad
data/Rakefile CHANGED
@@ -30,36 +30,44 @@ spec = helper.gemspec
30
30
  release_task = Rake::Task["release"]
31
31
  release_task.prerequisites.replace(["build", "release:rubygem_push"])
32
32
 
33
- def run_extconf(extension_dir, *arguments)
34
- cd(extension_dir) do
35
- ruby("extconf.rb", *arguments)
33
+ def run_extconf(build_dir, extension_dir, *arguments)
34
+ cd(build_dir) do
35
+ ruby(File.join(extension_dir, "extconf.rb"),
36
+ *arguments)
36
37
  end
37
38
  end
38
39
 
39
40
  spec.extensions.each do |extension|
40
- extension_dir = File.dirname(extension)
41
- CLOBBER << File.join(extension_dir, "Makefile")
42
- CLOBBER << File.join(extension_dir, "mkmf.log")
41
+ extension_dir = File.join(base_dir, File.dirname(extension))
42
+ build_dir = ENV["BUILD_DIR"]
43
+ if build_dir
44
+ build_dir = File.join(build_dir, "red-arrow")
45
+ directory build_dir
46
+ else
47
+ build_dir = extension_dir
48
+ end
49
+ CLOBBER << File.join(build_dir, "Makefile")
50
+ CLOBBER << File.join(build_dir, "mkmf.log")
43
51
 
44
- makefile = File.join(extension_dir, "Makefile")
45
- file makefile do
46
- run_extconf(extension_dir)
52
+ makefile = File.join(build_dir, "Makefile")
53
+ file makefile => build_dir do
54
+ run_extconf(build_dir, extension_dir)
47
55
  end
48
56
 
49
57
  desc "Configure"
50
- task :configure do
51
- run_extconf(extension_dir)
58
+ task :configure => build_dir do
59
+ run_extconf(build_dir, extension_dir)
52
60
  end
53
61
 
54
62
  desc "Compile"
55
63
  task :compile => makefile do
56
- cd(extension_dir) do
64
+ cd(build_dir) do
57
65
  sh("make")
58
66
  end
59
67
  end
60
68
 
61
69
  task :clean do
62
- cd(extension_dir) do
70
+ cd(build_dir) do
63
71
  sh("make", "clean") if File.exist?("Makefile")
64
72
  end
65
73
  end
@@ -67,7 +75,9 @@ end
67
75
 
68
76
  desc "Run tests"
69
77
  task :test do
70
- ruby("test/run-test.rb")
78
+ cd(base_dir) do
79
+ ruby("test/run-test.rb")
80
+ end
71
81
  end
72
82
 
73
83
  task default: :test
@@ -79,8 +89,10 @@ task :benchmark do
79
89
  else
80
90
  FileList["benchmark/{,*/**/}*.yml"]
81
91
  end
82
- benchmarks.each do |benchmark|
83
- sh("benchmark-driver", benchmark)
92
+ cd(base_dir) do
93
+ benchmarks.each do |benchmark|
94
+ sh("benchmark-driver", benchmark)
95
+ end
84
96
  end
85
97
  end
86
98
 
@@ -285,7 +285,8 @@ namespace red_arrow {
285
285
  // VISIT(Interval)
286
286
  VISIT(List)
287
287
  VISIT(Struct)
288
- VISIT(Union)
288
+ VISIT(SparseUnion)
289
+ VISIT(DenseUnion)
289
290
  VISIT(Dictionary)
290
291
  VISIT(Decimal128)
291
292
  // TODO
@@ -339,9 +340,9 @@ namespace red_arrow {
339
340
  index_ = index;
340
341
  result_ = rb_hash_new();
341
342
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
343
+ const auto n = struct_type->num_fields();
343
344
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
345
+ const auto field_type = struct_type->field(i).get();
345
346
  const auto& field_name = field_type->name();
346
347
  auto key_keep = key_;
347
348
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,7 +389,8 @@ namespace red_arrow {
388
389
  // VISIT(Interval)
389
390
  VISIT(List)
390
391
  VISIT(Struct)
391
- VISIT(Union)
392
+ VISIT(SparseUnion)
393
+ VISIT(DenseUnion)
392
394
  VISIT(Dictionary)
393
395
  VISIT(Decimal128)
394
396
  // TODO
@@ -432,10 +434,10 @@ namespace red_arrow {
432
434
  index_ = index;
433
435
  switch (array.mode()) {
434
436
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
437
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
438
  break;
437
439
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
440
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
441
  break;
440
442
  default:
441
443
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,7 +481,8 @@ namespace red_arrow {
479
481
  // VISIT(Interval)
480
482
  VISIT(List)
481
483
  VISIT(Struct)
482
- VISIT(Union)
484
+ VISIT(SparseUnion)
485
+ VISIT(DenseUnion)
483
486
  VISIT(Dictionary)
484
487
  VISIT(Decimal128)
485
488
  // TODO
@@ -501,14 +504,14 @@ namespace red_arrow {
501
504
  result_ = result;
502
505
  }
503
506
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
507
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
508
  arrow::UnionType* type,
506
509
  const char* tag) {
507
510
  const auto type_code = array.raw_type_codes()[index_];
508
511
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
512
+ const auto field_id = type->child_ids()[type_code];
513
+ if (field_id >= 0) {
514
+ return field_id;
512
515
  }
513
516
  }
514
517
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +519,33 @@ namespace red_arrow {
516
519
  return 0;
517
520
  }
518
521
 
519
- void convert_sparse(const arrow::UnionArray& array) {
522
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
523
  const auto type =
521
524
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
525
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
526
+ const auto index = compute_field_index(array, type, tag);
527
+ const auto field = type->field(index).get();
528
+ const auto& field_name = field->name();
526
529
  const auto field_name_keep = field_name_;
527
530
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
531
+ const auto field_array = array.field(index).get();
532
+ check_status(field_array->Accept(this), tag);
530
533
  field_name_ = field_name_keep;
531
534
  }
532
535
 
533
- void convert_dense(const arrow::UnionArray& array) {
536
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
537
  const auto type =
535
538
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
539
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
540
+ const auto index = compute_field_index(array, type, tag);
541
+ const auto field = type->field(index).get();
542
+ const auto& field_name = field->name();
540
543
  const auto field_name_keep = field_name_;
541
544
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
545
+ const auto field_array = array.field(index);
543
546
  const auto index_keep = index_;
544
547
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
548
+ check_status(field_array->Accept(this), tag);
546
549
  index_ = index_keep;
547
550
  field_name_ = field_name_keep;
548
551
  }
@@ -557,30 +560,57 @@ namespace red_arrow {
557
560
  public:
558
561
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
562
  : array_value_converter_(converter),
560
- index_(0),
563
+ value_index_(0),
561
564
  result_(Qnil) {
562
565
  }
563
566
 
564
567
  VALUE convert(const arrow::DictionaryArray& array,
565
568
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
569
+ value_index_ = array.GetValueIndex(index);
570
+ auto dictionary = array.dictionary().get();
571
+ check_status(dictionary->Accept(this),
569
572
  "[raw-records][dictionary-array]");
570
573
  return result_;
571
574
  }
572
575
 
573
- // TODO: Convert to real value.
574
576
  #define VISIT(TYPE) \
575
577
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
578
+ result_ = convert_value(array, value_index_); \
577
579
  return arrow::Status::OK(); \
578
580
  }
579
581
 
582
+ VISIT(Null)
583
+ VISIT(Boolean)
580
584
  VISIT(Int8)
581
585
  VISIT(Int16)
582
586
  VISIT(Int32)
583
587
  VISIT(Int64)
588
+ VISIT(UInt8)
589
+ VISIT(UInt16)
590
+ VISIT(UInt32)
591
+ VISIT(UInt64)
592
+ // TODO
593
+ // VISIT(HalfFloat)
594
+ VISIT(Float)
595
+ VISIT(Double)
596
+ VISIT(Binary)
597
+ VISIT(String)
598
+ VISIT(FixedSizeBinary)
599
+ VISIT(Date32)
600
+ VISIT(Date64)
601
+ VISIT(Time32)
602
+ VISIT(Time64)
603
+ VISIT(Timestamp)
604
+ // TODO
605
+ // VISIT(Interval)
606
+ VISIT(List)
607
+ VISIT(Struct)
608
+ VISIT(SparseUnion)
609
+ VISIT(DenseUnion)
610
+ VISIT(Dictionary)
611
+ VISIT(Decimal128)
612
+ // TODO
613
+ // VISIT(Extension)
584
614
 
585
615
  #undef VISIT
586
616
 
@@ -592,7 +622,7 @@ namespace red_arrow {
592
622
  }
593
623
 
594
624
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
625
+ int64_t value_index_;
596
626
  VALUE result_;
597
627
  };
598
628
 
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -100,7 +100,8 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
106
107
  // TODO
@@ -81,7 +81,8 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
87
88
  // TODO
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryArray
20
+ def get_value(i)
21
+ dictionary[indices[i]]
22
+ end
23
+ end
24
+ end
@@ -19,24 +19,24 @@ module Arrow
19
19
  module GenericFilterable
20
20
  class << self
21
21
  def included(base)
22
- base.alias_method :filter_raw, :filter
23
- base.alias_method :filter, :filter_generic
22
+ base.__send__(:alias_method, :filter_raw, :filter)
23
+ base.__send__(:alias_method, :filter, :filter_generic)
24
24
  end
25
25
  end
26
26
 
27
- def filter_generic(filter)
27
+ def filter_generic(filter, options=nil)
28
28
  case filter
29
29
  when ::Array
30
- filter_raw(BooleanArray.new(filter))
30
+ filter_raw(BooleanArray.new(filter), options)
31
31
  when ChunkedArray
32
32
  if respond_to?(:filter_chunked_array)
33
- filter_chunked_array(filter)
33
+ filter_chunked_array(filter, options)
34
34
  else
35
35
  # TODO: Implement this in C++
36
- filter_raw(filter.pack)
36
+ filter_raw(filter.pack, options)
37
37
  end
38
38
  else
39
- filter_raw(filter)
39
+ filter_raw(filter, options)
40
40
  end
41
41
  end
42
42
  end
@@ -19,8 +19,8 @@ module Arrow
19
19
  module GenericTakeable
20
20
  class << self
21
21
  def included(base)
22
- base.alias_method :take_raw, :take
23
- base.alias_method :take, :take_generic
22
+ base.__send__(:alias_method, :take_raw, :take)
23
+ base.__send__(:alias_method, :take, :take_generic)
24
24
  end
25
25
  end
26
26
 
@@ -41,6 +41,7 @@ module Arrow
41
41
  require "arrow/array"
42
42
  require "arrow/array-builder"
43
43
  require "arrow/bigdecimal-extension"
44
+ require "arrow/buffer"
44
45
  require "arrow/chunked-array"
45
46
  require "arrow/column"
46
47
  require "arrow/compression-type"
@@ -56,6 +57,7 @@ module Arrow
56
57
  require "arrow/decimal128-array-builder"
57
58
  require "arrow/decimal128-data-type"
58
59
  require "arrow/dense-union-data-type"
60
+ require "arrow/dictionary-array"
59
61
  require "arrow/dictionary-data-type"
60
62
  require "arrow/field"
61
63
  require "arrow/file-output-stream"
@@ -69,6 +71,7 @@ module Arrow
69
71
  require "arrow/record-batch"
70
72
  require "arrow/record-batch-builder"
71
73
  require "arrow/record-batch-file-reader"
74
+ require "arrow/record-batch-iterator"
72
75
  require "arrow/record-batch-stream-reader"
73
76
  require "arrow/rolling-window"
74
77
  require "arrow/schema"