red-arrow 4.0.0 → 6.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/arrow.cpp +3 -0
  4. data/ext/arrow/converters.cpp +5 -0
  5. data/ext/arrow/converters.hpp +126 -0
  6. data/ext/arrow/extconf.rb +13 -0
  7. data/ext/arrow/memory-view.cpp +311 -0
  8. data/ext/arrow/memory-view.hpp +26 -0
  9. data/ext/arrow/raw-records.cpp +1 -0
  10. data/ext/arrow/values.cpp +1 -0
  11. data/lib/arrow/aggregate-node-options.rb +35 -0
  12. data/lib/arrow/aggregation.rb +46 -0
  13. data/lib/arrow/array-builder.rb +5 -0
  14. data/lib/arrow/array.rb +12 -0
  15. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  16. data/lib/arrow/buffer.rb +10 -6
  17. data/lib/arrow/column-containable.rb +100 -1
  18. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  19. data/lib/arrow/datum.rb +100 -0
  20. data/lib/arrow/equal-options.rb +38 -0
  21. data/lib/arrow/expression.rb +48 -0
  22. data/lib/arrow/file-system.rb +34 -0
  23. data/lib/arrow/group.rb +116 -124
  24. data/lib/arrow/loader.rb +44 -0
  25. data/lib/arrow/map-array-builder.rb +109 -0
  26. data/lib/arrow/map-array.rb +26 -0
  27. data/lib/arrow/map-data-type.rb +89 -0
  28. data/lib/arrow/path-extension.rb +1 -1
  29. data/lib/arrow/record-batch-reader.rb +41 -0
  30. data/lib/arrow/record-batch.rb +0 -2
  31. data/lib/arrow/scalar.rb +32 -0
  32. data/lib/arrow/slicer.rb +44 -143
  33. data/lib/arrow/source-node-options.rb +32 -0
  34. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  35. data/lib/arrow/symbol-values-appendable.rb +34 -0
  36. data/lib/arrow/table-concatenate-options.rb +36 -0
  37. data/lib/arrow/table-formatter.rb +141 -17
  38. data/lib/arrow/table-list-formatter.rb +5 -3
  39. data/lib/arrow/table-loader.rb +41 -3
  40. data/lib/arrow/table-saver.rb +29 -3
  41. data/lib/arrow/table-table-formatter.rb +7 -31
  42. data/lib/arrow/table.rb +34 -40
  43. data/lib/arrow/version.rb +1 -1
  44. data/red-arrow.gemspec +2 -1
  45. data/test/helper.rb +1 -0
  46. data/test/raw-records/test-dense-union-array.rb +14 -0
  47. data/test/raw-records/test-list-array.rb +19 -0
  48. data/test/raw-records/test-map-array.rb +441 -0
  49. data/test/raw-records/test-sparse-union-array.rb +14 -0
  50. data/test/raw-records/test-struct-array.rb +15 -0
  51. data/test/test-array-builder.rb +7 -0
  52. data/test/test-array.rb +34 -0
  53. data/test/test-binary-dictionary-array-builder.rb +103 -0
  54. data/test/test-boolean-scalar.rb +26 -0
  55. data/test/test-csv-loader.rb +8 -8
  56. data/test/test-expression.rb +40 -0
  57. data/test/test-float-scalar.rb +46 -0
  58. data/test/test-function.rb +176 -0
  59. data/test/test-group.rb +75 -51
  60. data/test/test-map-array-builder.rb +110 -0
  61. data/test/test-map-array.rb +33 -0
  62. data/test/test-map-data-type.rb +36 -0
  63. data/test/test-memory-view.rb +434 -0
  64. data/test/test-record-batch-reader.rb +46 -0
  65. data/test/test-record-batch.rb +42 -0
  66. data/test/test-slicer.rb +166 -167
  67. data/test/test-string-dictionary-array-builder.rb +103 -0
  68. data/test/test-table.rb +190 -53
  69. data/test/values/test-dense-union-array.rb +14 -0
  70. data/test/values/test-list-array.rb +17 -0
  71. data/test/values/test-map-array.rb +433 -0
  72. data/test/values/test-sparse-union-array.rb +14 -0
  73. data/test/values/test-struct-array.rb +15 -0
  74. metadata +132 -73
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Aggregation
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ function = value[:function]
26
+ return nil if function.nil?
27
+ function = function.to_s if function.is_a?(Symbol)
28
+ return nil unless function.is_a?(String)
29
+ # TODO: Improve this when we have non hash based aggregate function
30
+ function = "hash_#{function}" unless function.start_with?("hash_")
31
+ options = value[:options]
32
+ input = value[:input]
33
+ return nil if input.nil?
34
+ output = value[:output]
35
+ if output.nil?
36
+ normalized_function = function.gsub(/\Ahash_/, "")
37
+ output = "#{normalized_function}(#{input})"
38
+ end
39
+ new(function, options, input, output)
40
+ else
41
+ nil
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -58,6 +58,11 @@ module Arrow
58
58
  builder: StringArrayBuilder.new,
59
59
  detected: true,
60
60
  }
61
+ when Symbol
62
+ {
63
+ builder: StringDictionaryArrayBuilder.new,
64
+ detected: true,
65
+ }
61
66
  when Float
62
67
  {
63
68
  builder: DoubleArrayBuilder.new,
data/lib/arrow/array.rb CHANGED
@@ -55,6 +55,18 @@ module Arrow
55
55
  end
56
56
  end
57
57
 
58
+ # @param other [Arrow::Array] The array to be compared.
59
+ # @param options [Arrow::EqualOptions, Hash] (nil)
60
+ # The options to custom how to compare.
61
+ #
62
+ # @return [Boolean]
63
+ # `true` if both of them have the same data, `false` otherwise.
64
+ #
65
+ # @since 5.0.0
66
+ def equal_array?(other, options=nil)
67
+ equal_options(other, options)
68
+ end
69
+
58
70
  def each
59
71
  return to_enum(__method__) unless block_given?
60
72
 
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class BinaryDictionaryArrayBuilder
20
+ include SymbolValuesAppendable
21
+
22
+ private
23
+ def create_values_array_builder
24
+ BinaryArrayBuilder.new
25
+ end
26
+ end
27
+ end
data/lib/arrow/buffer.rb CHANGED
@@ -17,12 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Buffer
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- def initialize(data)
24
- @data = data
25
- initialize_raw(data)
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when String
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -27,6 +27,17 @@ module Arrow
27
27
  columns.each(&block)
28
28
  end
29
29
 
30
+ # @overload [](name)
31
+ # Find a column that has the given name.
32
+ #
33
+ # @param name [String, Symbol] The column name to be found.
34
+ # @return [Column] The found column.
35
+ #
36
+ # @overload [](index)
37
+ # Find the `index`-th column.
38
+ #
39
+ # @param index [Integer] The index to be found.
40
+ # @return [Column] The found column.
30
41
  def find_column(name_or_index)
31
42
  case name_or_index
32
43
  when String, Symbol
@@ -40,9 +51,97 @@ module Arrow
40
51
  return nil if index < 0 or index >= n_columns
41
52
  Column.new(self, index)
42
53
  else
43
- message = "column name or index must be String, Symbol or Integer"
54
+ message = "column name or index must be String, Symbol or Integer: "
55
+ message << name_or_index.inspect
44
56
  raise ArgumentError, message
45
57
  end
46
58
  end
59
+
60
+ # Selects columns that are selected by `selectors` and/or `block`
61
+ # and creates a new container only with the selected columns.
62
+ #
63
+ # @param selectors [Array<String, Symbol, Integer, Range>]
64
+ # If a selector is `String`, `Symbol` or `Integer`, the selector
65
+ # selects a column by {#find_column}.
66
+ #
67
+ # If a selector is `Range`, the selector selects columns by `::Array#[]`.
68
+ # @yield [column] Gives a column to the block to select columns.
69
+ # This uses `::Array#select`.
70
+ # @yieldparam column [Column] A target column.
71
+ # @yieldreturn [Boolean] Whether the given column is selected or not.
72
+ # @return [self.class] The newly created container that only has selected
73
+ # columns.
74
+ def select_columns(*selectors, &block)
75
+ if selectors.empty?
76
+ return to_enum(__method__) unless block_given?
77
+ selected_columns = columns.select(&block)
78
+ else
79
+ selected_columns = []
80
+ selectors.each do |selector|
81
+ case selector
82
+ when Range
83
+ selected_columns.concat(columns[selector])
84
+ else
85
+ column = find_column(selector)
86
+ if column.nil?
87
+ case selector
88
+ when String, Symbol
89
+ message = "unknown column: #{selector.inspect}: #{inspect}"
90
+ raise KeyError.new(message)
91
+ else
92
+ message = "out of index (0..#{n_columns - 1}): "
93
+ message << "#{selector.inspect}: #{inspect}"
94
+ raise IndexError.new(message)
95
+ end
96
+ end
97
+ selected_columns << column
98
+ end
99
+ end
100
+ selected_columns = selected_columns.select(&block) if block_given?
101
+ end
102
+ self.class.new(selected_columns)
103
+ end
104
+
105
+ # @overload [](name)
106
+ # Find a column that has the given name.
107
+ #
108
+ # @param name [String, Symbol] The column name to be found.
109
+ # @return [Column] The found column.
110
+ # @see #find_column
111
+ #
112
+ # @overload [](index)
113
+ # Find the `index`-th column.
114
+ #
115
+ # @param index [Integer] The index to be found.
116
+ # @return [Column] The found column.
117
+ # @see #find_column
118
+ #
119
+ # @overload [](range)
120
+ # Selects columns that are in `range` and creates a new container
121
+ # only with the selected columns.
122
+ #
123
+ # @param range [Range] The range to be selected.
124
+ # @return [self.class] The newly created container that only has selected
125
+ # columns.
126
+ # @see #select_columns
127
+ #
128
+ # @overload [](selectors)
129
+ # Selects columns that are selected by `selectors` and creates a
130
+ # new container only with the selected columns.
131
+ #
132
+ # @param selectors [Array] The selectors that are used to select columns.
133
+ # @return [self.class] The newly created container that only has selected
134
+ # columns.
135
+ # @see #select_columns
136
+ def [](selector)
137
+ case selector
138
+ when ::Array
139
+ select_columns(*selector)
140
+ when Range
141
+ select_columns(selector)
142
+ else
143
+ find_column(selector)
144
+ end
145
+ end
47
146
  end
48
147
  end
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module ConstructorArgumentsGCGuardable
20
+ def initialize(*args)
21
+ super
22
+ @arguments = args
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,100 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Datum
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Table
25
+ TableDatum.new(value)
26
+ when Array
27
+ ArrayDatum.new(value)
28
+ when ChunkedArray
29
+ ChunkedArrayDatum.new(value)
30
+ when Scalar
31
+ ScalarDatum.new(value)
32
+ when ::Array
33
+ ArrayDatum.new(ArrayBuilder.build(value))
34
+ when Integer
35
+ case value
36
+ when (0..((2 ** 8) - 1))
37
+ try_convert(UInt8Scalar.new(value))
38
+ when ((-(2 ** 7))..((2 ** 7) - 1))
39
+ try_convert(Int8Scalar.new(value))
40
+ when (0..((2 ** 16) - 1))
41
+ try_convert(UInt16Scalar.new(value))
42
+ when ((-(2 ** 15))..((2 ** 15) - 1))
43
+ try_convert(Int16Scalar.new(value))
44
+ when (0..((2 ** 32) - 1))
45
+ try_convert(UInt32Scalar.new(value))
46
+ when ((-(2 ** 31))..((2 ** 31) - 1))
47
+ try_convert(Int32Scalar.new(value))
48
+ when (0..((2 ** 64) - 1))
49
+ try_convert(UInt64Scalar.new(value))
50
+ when ((-(2 ** 63))..((2 ** 63) - 1))
51
+ try_convert(Int64Scalar.new(value))
52
+ else
53
+ nil
54
+ end
55
+ when Float
56
+ try_convert(DoubleScalar.new(value))
57
+ when true, false
58
+ try_convert(BooleanScalar.new(value))
59
+ when String
60
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
61
+ if value.bytesize <= ((2 ** 31) - 1)
62
+ try_convert(StringScalar.new(value))
63
+ else
64
+ try_convert(LargeStringScalar.new(value))
65
+ end
66
+ else
67
+ if value.bytesize <= ((2 ** 31) - 1)
68
+ try_convert(BinaryScalar.new(value))
69
+ else
70
+ try_convert(LargeBinaryScalar.new(value))
71
+ end
72
+ end
73
+ when Date
74
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
75
+ try_convert(Date32Scalar.new(date32_value))
76
+ when Time
77
+ case value.unit
78
+ when TimeUnit::SECOND, TimeUnit::MILLI
79
+ data_type = Time32DataType.new(value.unit)
80
+ scalar_class = Time32Scalar
81
+ else
82
+ data_type = Time64DataType.new(value.unit)
83
+ scalar_class = Time64Scalar
84
+ end
85
+ try_convert(scalar_class.new(data_type, value.value))
86
+ when ::Time
87
+ data_type = TimestampDataType.new(:nano)
88
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
89
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
90
+ when Decimal128
91
+ data_type = TimestampDataType.new(:nano)
92
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
93
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
94
+ else
95
+ nil
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class EqualOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ setter = :"#{k}="
28
+ return unless options.respond_to?(setter)
29
+ options.__send__(setter, v)
30
+ end
31
+ options
32
+ else
33
+ nil
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,48 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Expression
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol
25
+ FieldExpression.new(value.to_s)
26
+ when ::Array
27
+ function_name, *arguments = value
28
+ case function_name
29
+ when String, Symbol
30
+ function_name = function_name.to_s
31
+ else
32
+ return nil
33
+ end
34
+ if arguments.last.is_a?(FunctionOptions)
35
+ options = arguments.pop
36
+ else
37
+ options = nil
38
+ end
39
+ CallExpression.new(function_name, arguments, options)
40
+ else
41
+ datum = Datum.try_convert(value)
42
+ return nil if datum.nil?
43
+ LiteralExpression.new(datum)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class FileSystem
20
+ alias_method :open_output_stream_raw, :open_output_stream
21
+ def open_output_stream(path)
22
+ stream = open_output_stream_raw(path)
23
+ if block_given?
24
+ begin
25
+ yield(stream)
26
+ ensure
27
+ stream.close
28
+ end
29
+ else
30
+ stream
31
+ end
32
+ end
33
+ end
34
+ end