red-arrow 4.0.0 → 6.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/arrow.cpp +3 -0
  4. data/ext/arrow/converters.cpp +5 -0
  5. data/ext/arrow/converters.hpp +126 -0
  6. data/ext/arrow/extconf.rb +13 -0
  7. data/ext/arrow/memory-view.cpp +311 -0
  8. data/ext/arrow/memory-view.hpp +26 -0
  9. data/ext/arrow/raw-records.cpp +1 -0
  10. data/ext/arrow/values.cpp +1 -0
  11. data/lib/arrow/aggregate-node-options.rb +35 -0
  12. data/lib/arrow/aggregation.rb +46 -0
  13. data/lib/arrow/array-builder.rb +5 -0
  14. data/lib/arrow/array.rb +12 -0
  15. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  16. data/lib/arrow/buffer.rb +10 -6
  17. data/lib/arrow/column-containable.rb +100 -1
  18. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  19. data/lib/arrow/datum.rb +100 -0
  20. data/lib/arrow/equal-options.rb +38 -0
  21. data/lib/arrow/expression.rb +48 -0
  22. data/lib/arrow/file-system.rb +34 -0
  23. data/lib/arrow/group.rb +116 -124
  24. data/lib/arrow/loader.rb +44 -0
  25. data/lib/arrow/map-array-builder.rb +109 -0
  26. data/lib/arrow/map-array.rb +26 -0
  27. data/lib/arrow/map-data-type.rb +89 -0
  28. data/lib/arrow/path-extension.rb +1 -1
  29. data/lib/arrow/record-batch-reader.rb +41 -0
  30. data/lib/arrow/record-batch.rb +0 -2
  31. data/lib/arrow/scalar.rb +32 -0
  32. data/lib/arrow/slicer.rb +44 -143
  33. data/lib/arrow/source-node-options.rb +32 -0
  34. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  35. data/lib/arrow/symbol-values-appendable.rb +34 -0
  36. data/lib/arrow/table-concatenate-options.rb +36 -0
  37. data/lib/arrow/table-formatter.rb +141 -17
  38. data/lib/arrow/table-list-formatter.rb +5 -3
  39. data/lib/arrow/table-loader.rb +41 -3
  40. data/lib/arrow/table-saver.rb +29 -3
  41. data/lib/arrow/table-table-formatter.rb +7 -31
  42. data/lib/arrow/table.rb +34 -40
  43. data/lib/arrow/version.rb +1 -1
  44. data/red-arrow.gemspec +2 -1
  45. data/test/helper.rb +1 -0
  46. data/test/raw-records/test-dense-union-array.rb +14 -0
  47. data/test/raw-records/test-list-array.rb +19 -0
  48. data/test/raw-records/test-map-array.rb +441 -0
  49. data/test/raw-records/test-sparse-union-array.rb +14 -0
  50. data/test/raw-records/test-struct-array.rb +15 -0
  51. data/test/test-array-builder.rb +7 -0
  52. data/test/test-array.rb +34 -0
  53. data/test/test-binary-dictionary-array-builder.rb +103 -0
  54. data/test/test-boolean-scalar.rb +26 -0
  55. data/test/test-csv-loader.rb +8 -8
  56. data/test/test-expression.rb +40 -0
  57. data/test/test-float-scalar.rb +46 -0
  58. data/test/test-function.rb +176 -0
  59. data/test/test-group.rb +75 -51
  60. data/test/test-map-array-builder.rb +110 -0
  61. data/test/test-map-array.rb +33 -0
  62. data/test/test-map-data-type.rb +36 -0
  63. data/test/test-memory-view.rb +434 -0
  64. data/test/test-record-batch-reader.rb +46 -0
  65. data/test/test-record-batch.rb +42 -0
  66. data/test/test-slicer.rb +166 -167
  67. data/test/test-string-dictionary-array-builder.rb +103 -0
  68. data/test/test-table.rb +190 -53
  69. data/test/values/test-dense-union-array.rb +14 -0
  70. data/test/values/test-list-array.rb +17 -0
  71. data/test/values/test-map-array.rb +433 -0
  72. data/test/values/test-sparse-union-array.rb +14 -0
  73. data/test/values/test-struct-array.rb +15 -0
  74. metadata +132 -73
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Aggregation
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ function = value[:function]
26
+ return nil if function.nil?
27
+ function = function.to_s if function.is_a?(Symbol)
28
+ return nil unless function.is_a?(String)
29
+ # TODO: Improve this when we have non hash based aggregate function
30
+ function = "hash_#{function}" unless function.start_with?("hash_")
31
+ options = value[:options]
32
+ input = value[:input]
33
+ return nil if input.nil?
34
+ output = value[:output]
35
+ if output.nil?
36
+ normalized_function = function.gsub(/\Ahash_/, "")
37
+ output = "#{normalized_function}(#{input})"
38
+ end
39
+ new(function, options, input, output)
40
+ else
41
+ nil
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -58,6 +58,11 @@ module Arrow
58
58
  builder: StringArrayBuilder.new,
59
59
  detected: true,
60
60
  }
61
+ when Symbol
62
+ {
63
+ builder: StringDictionaryArrayBuilder.new,
64
+ detected: true,
65
+ }
61
66
  when Float
62
67
  {
63
68
  builder: DoubleArrayBuilder.new,
data/lib/arrow/array.rb CHANGED
@@ -55,6 +55,18 @@ module Arrow
55
55
  end
56
56
  end
57
57
 
58
+ # @param other [Arrow::Array] The array to be compared.
59
+ # @param options [Arrow::EqualOptions, Hash] (nil)
60
+ # The options to custom how to compare.
61
+ #
62
+ # @return [Boolean]
63
+ # `true` if both of them have the same data, `false` otherwise.
64
+ #
65
+ # @since 5.0.0
66
+ def equal_array?(other, options=nil)
67
+ equal_options(other, options)
68
+ end
69
+
58
70
  def each
59
71
  return to_enum(__method__) unless block_given?
60
72
 
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class BinaryDictionaryArrayBuilder
20
+ include SymbolValuesAppendable
21
+
22
+ private
23
+ def create_values_array_builder
24
+ BinaryArrayBuilder.new
25
+ end
26
+ end
27
+ end
data/lib/arrow/buffer.rb CHANGED
@@ -17,12 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Buffer
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- def initialize(data)
24
- @data = data
25
- initialize_raw(data)
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when String
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -27,6 +27,17 @@ module Arrow
27
27
  columns.each(&block)
28
28
  end
29
29
 
30
+ # @overload [](name)
31
+ # Find a column that has the given name.
32
+ #
33
+ # @param name [String, Symbol] The column name to be found.
34
+ # @return [Column] The found column.
35
+ #
36
+ # @overload [](index)
37
+ # Find the `index`-th column.
38
+ #
39
+ # @param index [Integer] The index to be found.
40
+ # @return [Column] The found column.
30
41
  def find_column(name_or_index)
31
42
  case name_or_index
32
43
  when String, Symbol
@@ -40,9 +51,97 @@ module Arrow
40
51
  return nil if index < 0 or index >= n_columns
41
52
  Column.new(self, index)
42
53
  else
43
- message = "column name or index must be String, Symbol or Integer"
54
+ message = "column name or index must be String, Symbol or Integer: "
55
+ message << name_or_index.inspect
44
56
  raise ArgumentError, message
45
57
  end
46
58
  end
59
+
60
+ # Selects columns that are selected by `selectors` and/or `block`
61
+ # and creates a new container only with the selected columns.
62
+ #
63
+ # @param selectors [Array<String, Symbol, Integer, Range>]
64
+ # If a selector is `String`, `Symbol` or `Integer`, the selector
65
+ # selects a column by {#find_column}.
66
+ #
67
+ # If a selector is `Range`, the selector selects columns by `::Array#[]`.
68
+ # @yield [column] Gives a column to the block to select columns.
69
+ # This uses `::Array#select`.
70
+ # @yieldparam column [Column] A target column.
71
+ # @yieldreturn [Boolean] Whether the given column is selected or not.
72
+ # @return [self.class] The newly created container that only has selected
73
+ # columns.
74
+ def select_columns(*selectors, &block)
75
+ if selectors.empty?
76
+ return to_enum(__method__) unless block_given?
77
+ selected_columns = columns.select(&block)
78
+ else
79
+ selected_columns = []
80
+ selectors.each do |selector|
81
+ case selector
82
+ when Range
83
+ selected_columns.concat(columns[selector])
84
+ else
85
+ column = find_column(selector)
86
+ if column.nil?
87
+ case selector
88
+ when String, Symbol
89
+ message = "unknown column: #{selector.inspect}: #{inspect}"
90
+ raise KeyError.new(message)
91
+ else
92
+ message = "out of index (0..#{n_columns - 1}): "
93
+ message << "#{selector.inspect}: #{inspect}"
94
+ raise IndexError.new(message)
95
+ end
96
+ end
97
+ selected_columns << column
98
+ end
99
+ end
100
+ selected_columns = selected_columns.select(&block) if block_given?
101
+ end
102
+ self.class.new(selected_columns)
103
+ end
104
+
105
+ # @overload [](name)
106
+ # Find a column that has the given name.
107
+ #
108
+ # @param name [String, Symbol] The column name to be found.
109
+ # @return [Column] The found column.
110
+ # @see #find_column
111
+ #
112
+ # @overload [](index)
113
+ # Find the `index`-th column.
114
+ #
115
+ # @param index [Integer] The index to be found.
116
+ # @return [Column] The found column.
117
+ # @see #find_column
118
+ #
119
+ # @overload [](range)
120
+ # Selects columns that are in `range` and creates a new container
121
+ # only with the selected columns.
122
+ #
123
+ # @param range [Range] The range to be selected.
124
+ # @return [self.class] The newly created container that only has selected
125
+ # columns.
126
+ # @see #select_columns
127
+ #
128
+ # @overload [](selectors)
129
+ # Selects columns that are selected by `selectors` and creates a
130
+ # new container only with the selected columns.
131
+ #
132
+ # @param selectors [Array] The selectors that are used to select columns.
133
+ # @return [self.class] The newly created container that only has selected
134
+ # columns.
135
+ # @see #select_columns
136
+ def [](selector)
137
+ case selector
138
+ when ::Array
139
+ select_columns(*selector)
140
+ when Range
141
+ select_columns(selector)
142
+ else
143
+ find_column(selector)
144
+ end
145
+ end
47
146
  end
48
147
  end
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module ConstructorArgumentsGCGuardable
20
+ def initialize(*args)
21
+ super
22
+ @arguments = args
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,100 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Datum
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Table
25
+ TableDatum.new(value)
26
+ when Array
27
+ ArrayDatum.new(value)
28
+ when ChunkedArray
29
+ ChunkedArrayDatum.new(value)
30
+ when Scalar
31
+ ScalarDatum.new(value)
32
+ when ::Array
33
+ ArrayDatum.new(ArrayBuilder.build(value))
34
+ when Integer
35
+ case value
36
+ when (0..((2 ** 8) - 1))
37
+ try_convert(UInt8Scalar.new(value))
38
+ when ((-(2 ** 7))..((2 ** 7) - 1))
39
+ try_convert(Int8Scalar.new(value))
40
+ when (0..((2 ** 16) - 1))
41
+ try_convert(UInt16Scalar.new(value))
42
+ when ((-(2 ** 15))..((2 ** 15) - 1))
43
+ try_convert(Int16Scalar.new(value))
44
+ when (0..((2 ** 32) - 1))
45
+ try_convert(UInt32Scalar.new(value))
46
+ when ((-(2 ** 31))..((2 ** 31) - 1))
47
+ try_convert(Int32Scalar.new(value))
48
+ when (0..((2 ** 64) - 1))
49
+ try_convert(UInt64Scalar.new(value))
50
+ when ((-(2 ** 63))..((2 ** 63) - 1))
51
+ try_convert(Int64Scalar.new(value))
52
+ else
53
+ nil
54
+ end
55
+ when Float
56
+ try_convert(DoubleScalar.new(value))
57
+ when true, false
58
+ try_convert(BooleanScalar.new(value))
59
+ when String
60
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
61
+ if value.bytesize <= ((2 ** 31) - 1)
62
+ try_convert(StringScalar.new(value))
63
+ else
64
+ try_convert(LargeStringScalar.new(value))
65
+ end
66
+ else
67
+ if value.bytesize <= ((2 ** 31) - 1)
68
+ try_convert(BinaryScalar.new(value))
69
+ else
70
+ try_convert(LargeBinaryScalar.new(value))
71
+ end
72
+ end
73
+ when Date
74
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
75
+ try_convert(Date32Scalar.new(date32_value))
76
+ when Time
77
+ case value.unit
78
+ when TimeUnit::SECOND, TimeUnit::MILLI
79
+ data_type = Time32DataType.new(value.unit)
80
+ scalar_class = Time32Scalar
81
+ else
82
+ data_type = Time64DataType.new(value.unit)
83
+ scalar_class = Time64Scalar
84
+ end
85
+ try_convert(scalar_class.new(data_type, value.value))
86
+ when ::Time
87
+ data_type = TimestampDataType.new(:nano)
88
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
89
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
90
+ when Decimal128
91
+ data_type = TimestampDataType.new(:nano)
92
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
93
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
94
+ else
95
+ nil
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class EqualOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ setter = :"#{k}="
28
+ return unless options.respond_to?(setter)
29
+ options.__send__(setter, v)
30
+ end
31
+ options
32
+ else
33
+ nil
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,48 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Expression
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol
25
+ FieldExpression.new(value.to_s)
26
+ when ::Array
27
+ function_name, *arguments = value
28
+ case function_name
29
+ when String, Symbol
30
+ function_name = function_name.to_s
31
+ else
32
+ return nil
33
+ end
34
+ if arguments.last.is_a?(FunctionOptions)
35
+ options = arguments.pop
36
+ else
37
+ options = nil
38
+ end
39
+ CallExpression.new(function_name, arguments, options)
40
+ else
41
+ datum = Datum.try_convert(value)
42
+ return nil if datum.nil?
43
+ LiteralExpression.new(datum)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class FileSystem
20
+ alias_method :open_output_stream_raw, :open_output_stream
21
+ def open_output_stream(path)
22
+ stream = open_output_stream_raw(path)
23
+ if block_given?
24
+ begin
25
+ yield(stream)
26
+ ensure
27
+ stream.close
28
+ end
29
+ else
30
+ stream
31
+ end
32
+ end
33
+ end
34
+ end