red-arrow 11.0.0 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +12 -27
  4. data/lib/arrow/array-computable.rb +13 -0
  5. data/lib/arrow/data-type.rb +9 -0
  6. data/lib/arrow/dense-union-array-builder.rb +49 -0
  7. data/lib/arrow/dense-union-array.rb +26 -0
  8. data/lib/arrow/loader.rb +5 -0
  9. data/lib/arrow/record-batch-file-reader.rb +2 -0
  10. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  11. data/lib/arrow/scalar.rb +67 -0
  12. data/lib/arrow/slicer.rb +61 -0
  13. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  14. data/lib/arrow/sparse-union-array.rb +26 -0
  15. data/lib/arrow/struct-array-builder.rb +0 -5
  16. data/lib/arrow/table.rb +130 -10
  17. data/lib/arrow/union-array-builder.rb +59 -0
  18. data/lib/arrow/version.rb +1 -1
  19. data/test/raw-records/test-dense-union-array.rb +90 -45
  20. data/test/raw-records/test-list-array.rb +28 -10
  21. data/test/raw-records/test-map-array.rb +39 -10
  22. data/test/raw-records/test-sparse-union-array.rb +86 -41
  23. data/test/raw-records/test-struct-array.rb +22 -8
  24. data/test/test-array.rb +7 -0
  25. data/test/test-chunked-array.rb +9 -0
  26. data/test/test-dense-union-array.rb +42 -0
  27. data/test/test-dense-union-data-type.rb +1 -1
  28. data/test/test-function.rb +7 -7
  29. data/test/test-group.rb +58 -58
  30. data/test/test-record-batch-file-reader.rb +21 -0
  31. data/test/test-record-batch-stream-reader.rb +129 -0
  32. data/test/test-scalar.rb +65 -0
  33. data/test/test-slicer.rb +194 -129
  34. data/test/test-sparse-union-array.rb +38 -0
  35. data/test/test-table.rb +200 -38
  36. data/test/values/test-dense-union-array.rb +88 -45
  37. data/test/values/test-list-array.rb +26 -10
  38. data/test/values/test-map-array.rb +33 -10
  39. data/test/values/test-sparse-union-array.rb +84 -41
  40. data/test/values/test-struct-array.rb +20 -8
  41. metadata +20 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23f72b7016d780c208dd8f8cbc627becce73e25754f83a62a4a4d3f4bb60a5e3
4
- data.tar.gz: 23f57e383e26d322e9fa81efed1e8e5545bd5b1160075f4a0b8553efda98af93
3
+ metadata.gz: 583b384182a9906a4fcd723cb67b0600b07dd962a1d45e114619014a396800c6
4
+ data.tar.gz: 97b7a63c52c678b7255a520d768f59fde993ffd2fd9014f24b0b946940f656b8
5
5
  SHA512:
6
- metadata.gz: 2d157d2d56dbca00a2f4a0eadb831a19a7a7124c3c3c8c7675536e4597f2a1447f047eec48654256eff3148457d09f1d3e375a095cf152051ef6603cd8bf25bf
7
- data.tar.gz: 958f320a92981a6ed7f84fcab92497ac1038a038de302419e9f57f83e3ca4d02d682ec05a129ac63c9ed6e613725619839b46cc589938a56573e88182a8002d4
6
+ metadata.gz: ab37d28349e4a3b23b629c4a2431a48e6b0a0b4884fb0274a484965e5c2ed551cc5b7b998433940c8e992135803f65a4c3b0555abebd6478fdaeb665f70e3f25
7
+ data.tar.gz: cfb8de7ce8d9b9cabb76f016e51bde489f9c58fc42253612395082a9ccab79001f56978d102ef18ac93f31e696406980304f2d4e40af46e8b2ba041363308920
data/README.md CHANGED
@@ -25,9 +25,9 @@ Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject In
25
25
 
26
26
  [GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
27
27
 
28
- Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
28
+ Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
29
29
 
30
- Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
30
+ Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
31
31
 
32
32
  gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem.
33
33
 
@@ -56,7 +56,7 @@ table.save("/dev/shm/data-processed.arrow")
56
56
  Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
57
57
 
58
58
  * For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
59
- * For Apache Arrow GLib: https://github.com/apache/arrow/blob/master/c_glib/README.md
59
+ * For Apache Arrow GLib: https://github.com/apache/arrow/blob/main/c_glib/README.md
60
60
 
61
61
  ```console
62
62
  $ cd ruby/red-arrow
@@ -685,25 +685,21 @@ namespace red_arrow {
685
685
  private:
686
686
  template <typename ArrayType>
687
687
  inline void convert_value(const ArrayType& array) {
688
- auto result = rb_hash_new();
689
688
  if (array.IsNull(index_)) {
690
- rb_hash_aset(result, field_name_, Qnil);
689
+ result_ = RUBY_Qnil;
691
690
  } else {
692
- rb_hash_aset(result,
693
- field_name_,
694
- array_value_converter_->convert(array, index_));
691
+ result_ = array_value_converter_->convert(array, index_);
695
692
  }
696
- result_ = result;
697
693
  }
698
694
 
699
- uint8_t compute_field_index(const arrow::UnionArray& array,
700
- arrow::UnionType* type,
701
- const char* tag) {
695
+ int8_t compute_child_id(const arrow::UnionArray& array,
696
+ arrow::UnionType* type,
697
+ const char* tag) {
702
698
  const auto type_code = array.raw_type_codes()[index_];
703
699
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
704
- const auto field_id = type->child_ids()[type_code];
705
- if (field_id >= 0) {
706
- return field_id;
700
+ const auto child_id = type->child_ids()[type_code];
701
+ if (child_id >= 0) {
702
+ return child_id;
707
703
  }
708
704
  }
709
705
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -715,36 +711,25 @@ namespace red_arrow {
715
711
  const auto type =
716
712
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
717
713
  const auto tag = "[raw-records][union-sparse-array]";
718
- const auto index = compute_field_index(array, type, tag);
719
- const auto field = type->field(index).get();
720
- const auto& field_name = field->name();
721
- const auto field_name_keep = field_name_;
722
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
723
- const auto field_array = array.field(index).get();
714
+ const auto child_id = compute_child_id(array, type, tag);
715
+ const auto field_array = array.field(child_id).get();
724
716
  check_status(field_array->Accept(this), tag);
725
- field_name_ = field_name_keep;
726
717
  }
727
718
 
728
719
  void convert_dense(const arrow::DenseUnionArray& array) {
729
720
  const auto type =
730
721
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
731
722
  const auto tag = "[raw-records][union-dense-array]";
732
- const auto index = compute_field_index(array, type, tag);
733
- const auto field = type->field(index).get();
734
- const auto& field_name = field->name();
735
- const auto field_name_keep = field_name_;
736
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
737
- const auto field_array = array.field(index);
723
+ const auto child_id = compute_child_id(array, type, tag);
724
+ const auto field_array = array.field(child_id);
738
725
  const auto index_keep = index_;
739
726
  index_ = array.value_offset(index_);
740
727
  check_status(field_array->Accept(this), tag);
741
728
  index_ = index_keep;
742
- field_name_ = field_name_keep;
743
729
  }
744
730
 
745
731
  ArrayValueConverter* array_value_converter_;
746
732
  int64_t index_;
747
- VALUE field_name_;
748
733
  VALUE result_;
749
734
  };
750
735
 
@@ -29,6 +29,19 @@ module Arrow
29
29
  unique.values
30
30
  end
31
31
 
32
+ # Finds the index of the first occurrence of a given value.
33
+ #
34
+ # @param value [Object] The value to be compared.
35
+ #
36
+ # @return [Integer] The index of the first occurrence of a given
37
+ # value on found, -1 on not found.
38
+ #
39
+ # @since 12.0.0
40
+ def index(value)
41
+ value = Scalar.resolve(value, value_data_type)
42
+ compute("index", options: {value: value}).value
43
+ end
44
+
32
45
  private
33
46
  def compute(name, options: nil)
34
47
  Function.find(name).execute([self], options).value
@@ -199,5 +199,14 @@ module Arrow
199
199
  args.unshift(self) unless builder_class.buildable?(args)
200
200
  builder_class.build(*args)
201
201
  end
202
+
203
+ # @return [Arrow::Scalar} A corresponding {Arrow::Scalar} class
204
+ # for this data type.
205
+ #
206
+ # @since 12.0.0
207
+ def scalar_class
208
+ base_name = self.class.name.gsub(/DataType\z/, "")
209
+ ::Arrow.const_get("#{base_name}Scalar")
210
+ end
202
211
  end
203
212
  end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DenseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_info[:builder].append(value.values[0])
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DenseUnionArray
20
+ def get_value(i)
21
+ child_id = get_child_id(i)
22
+ field = get_field(child_id)
23
+ field[get_value_offset(i)]
24
+ end
25
+ end
26
+ end
data/lib/arrow/loader.rb CHANGED
@@ -70,6 +70,8 @@ module Arrow
70
70
  require "arrow/decimal256-array"
71
71
  require "arrow/decimal256-array-builder"
72
72
  require "arrow/decimal256-data-type"
73
+ require "arrow/dense-union-array"
74
+ require "arrow/dense-union-array-builder"
73
75
  require "arrow/dense-union-data-type"
74
76
  require "arrow/dictionary-array"
75
77
  require "arrow/dictionary-data-type"
@@ -109,6 +111,8 @@ module Arrow
109
111
  require "arrow/sort-key"
110
112
  require "arrow/sort-options"
111
113
  require "arrow/source-node-options"
114
+ require "arrow/sparse-union-array"
115
+ require "arrow/sparse-union-array-builder"
112
116
  require "arrow/sparse-union-data-type"
113
117
  require "arrow/string-dictionary-array-builder"
114
118
  require "arrow/string-array-builder"
@@ -134,6 +138,7 @@ module Arrow
134
138
  require "arrow/timestamp-array"
135
139
  require "arrow/timestamp-array-builder"
136
140
  require "arrow/timestamp-data-type"
141
+ require "arrow/union-array-builder"
137
142
  require "arrow/writable"
138
143
  end
139
144
 
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) {n_record_batches} unless block_given?
24
+
23
25
  n_record_batches.times do |i|
24
26
  yield(get_record_batch(i))
25
27
  end
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) unless block_given?
24
+
23
25
  loop do
24
26
  record_batch = next_record_batch
25
27
  break if record_batch.nil?
data/lib/arrow/scalar.rb CHANGED
@@ -17,6 +17,73 @@
17
17
 
18
18
  module Arrow
19
19
  class Scalar
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when self
25
+ value
26
+ when true, false
27
+ BooleanScalar.new(value)
28
+ when Symbol, String
29
+ StringScalar.new(value.to_s)
30
+ when Integer
31
+ Int64Scalar.new(value)
32
+ when Float
33
+ DoubleScalar.new(value)
34
+ else
35
+ nil
36
+ end
37
+ end
38
+
39
+ # Ensure returning suitable {Arrow::Scalar}.
40
+ #
41
+ # @overload resolve(scalar)
42
+ #
43
+ # Returns the given scalar itself. This is convenient to
44
+ # use this method as {Arrow::Scalar} converter.
45
+ #
46
+ # @param scalar [Arrow::Scalar] The scalar.
47
+ #
48
+ # @return [Arrow::Scalar] The given scalar itself.
49
+ #
50
+ # @overload resolve(value)
51
+ #
52
+ # Creates a suitable scalar from the given value. For example,
53
+ # you can create {Arrow::BooleanScalar} from `true`.
54
+ #
55
+ # @param value [Object] The value.
56
+ #
57
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
58
+ #
59
+ # @overload resolve(value, data_type)
60
+ #
61
+ # Creates a scalar of `data_type.scalar_class` from the given
62
+ # value. For example, you can create {Arrow::Int32Scalar} from
63
+ # `29` and {Arrow::Int32DataType}.
64
+ #
65
+ # @param value [Object] The value.
66
+ #
67
+ # @param data_type [Arrow::DataType] The {Arrow::DataType} to
68
+ # decide the returned scalar class.
69
+ #
70
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
71
+ #
72
+ # @since 12.0.0
73
+ def resolve(value, data_type=nil)
74
+ return try_convert(value) if data_type.nil?
75
+
76
+ data_type = DataType.resolve(data_type)
77
+ scalar_class = data_type.scalar_class
78
+ case value
79
+ when Scalar
80
+ return value if value.class == scalar_class
81
+ value = value.value
82
+ end
83
+ scalar_class.new(value)
84
+ end
85
+ end
86
+
20
87
  # @param other [Arrow::Scalar] The scalar to be compared.
21
88
  # @param options [Arrow::EqualOptions, Hash] (nil)
22
89
  # The options to custom how to compare.
data/lib/arrow/slicer.rb CHANGED
@@ -162,6 +162,40 @@ module Arrow
162
162
  def reject(&block)
163
163
  RejectCondition.new(@column, block)
164
164
  end
165
+
166
+ def end_with?(substring, ignore_case: false)
167
+ MatchSubstringFamilyCondition.new("ends_with",
168
+ @column, substring, ignore_case)
169
+ end
170
+
171
+ def match_like?(pattern, ignore_case: false)
172
+ MatchSubstringFamilyCondition.new("match_like",
173
+ @column, pattern, ignore_case)
174
+ end
175
+
176
+ def match_substring?(pattern, ignore_case: nil)
177
+ case pattern
178
+ when String
179
+ ignore_case = false if ignore_case.nil?
180
+ MatchSubstringFamilyCondition.new("match_substring",
181
+ @column, pattern, ignore_case)
182
+ when Regexp
183
+ ignore_case = pattern.casefold? if ignore_case.nil?
184
+ MatchSubstringFamilyCondition.new("match_substring_regex",
185
+ @column,
186
+ pattern.source,
187
+ ignore_case)
188
+ else
189
+ message =
190
+ "pattern must be either String or Regexp: #{pattern.inspect}"
191
+ raise ArgumentError, message
192
+ end
193
+ end
194
+
195
+ def start_with?(substring, ignore_case: false)
196
+ MatchSubstringFamilyCondition.new("starts_with",
197
+ @column, substring, ignore_case)
198
+ end
165
199
  end
166
200
 
167
201
  class NotColumnCondition < Condition
@@ -351,5 +385,32 @@ module Arrow
351
385
  BooleanArray.new(raw_array)
352
386
  end
353
387
  end
388
+
389
+ class MatchSubstringFamilyCondition < Condition
390
+ def initialize(function, column, pattern, ignore_case, invert: false)
391
+ @function = function
392
+ @column = column
393
+ @options = MatchSubstringOptions.new
394
+ @options.pattern = pattern
395
+ @options.ignore_case = ignore_case
396
+ @invert = invert
397
+ end
398
+
399
+ def !@
400
+ MatchSubstringFamilyCondition.new(@function,
401
+ @column,
402
+ @options.pattern,
403
+ @options.ignore_case?,
404
+ invert: !@invert)
405
+ end
406
+
407
+ def evaluate
408
+ datum = Function.find(@function).execute([@column.data], @options)
409
+ if @invert
410
+ datum = Function.find("invert").execute([datum])
411
+ end
412
+ datum.value
413
+ end
414
+ end
354
415
  end
355
416
  end
@@ -0,0 +1,56 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_infos.each do |child_key, child_info|
46
+ builder = child_info[:builder]
47
+ if child_key == key
48
+ builder.append(value.values[0])
49
+ else
50
+ builder.append_null
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArray
20
+ def get_value(i)
21
+ child_id = get_child_id(i)
22
+ field = get_field(child_id)
23
+ field[i]
24
+ end
25
+ end
26
+ end
@@ -110,11 +110,6 @@ module Arrow
110
110
  end
111
111
  end
112
112
 
113
- alias_method :append_null_raw, :append_null
114
- def append_null
115
- append_null_raw
116
- end
117
-
118
113
  # @since 0.12.0
119
114
  def append(*values)
120
115
  if values.empty?