red-arrow 8.0.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +3 -0
  3. data/ext/arrow/extconf.rb +8 -4
  4. data/lib/arrow/array-builder.rb +40 -6
  5. data/lib/arrow/array-computable.rb +37 -0
  6. data/lib/arrow/array.rb +28 -0
  7. data/lib/arrow/chunked-array.rb +21 -0
  8. data/lib/arrow/column.rb +28 -0
  9. data/lib/arrow/data-type.rb +8 -3
  10. data/lib/arrow/decimal128-array-builder.rb +16 -6
  11. data/lib/arrow/decimal128.rb +14 -0
  12. data/lib/arrow/decimal256-array-builder.rb +16 -6
  13. data/lib/arrow/decimal256.rb +14 -0
  14. data/lib/arrow/field.rb +44 -3
  15. data/lib/arrow/list-data-type.rb +1 -6
  16. data/lib/arrow/loader.rb +3 -0
  17. data/lib/arrow/raw-table-converter.rb +6 -1
  18. data/lib/arrow/raw-tensor-converter.rb +89 -0
  19. data/lib/arrow/string-array-builder.rb +30 -0
  20. data/lib/arrow/tensor.rb +140 -0
  21. data/lib/arrow/time-unit.rb +31 -0
  22. data/lib/arrow/time32-array-builder.rb +2 -14
  23. data/lib/arrow/time32-data-type.rb +9 -38
  24. data/lib/arrow/time64-array-builder.rb +2 -14
  25. data/lib/arrow/time64-data-type.rb +9 -38
  26. data/lib/arrow/timestamp-array-builder.rb +2 -14
  27. data/lib/arrow/timestamp-data-type.rb +9 -34
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -1
  30. data/test/raw-records/test-dictionary-array.rb +341 -0
  31. data/test/test-array-builder.rb +62 -0
  32. data/test/test-chunked-array.rb +6 -0
  33. data/test/test-column.rb +31 -0
  34. data/test/test-decimal128-array-builder.rb +14 -0
  35. data/test/test-decimal128-array.rb +5 -2
  36. data/test/test-decimal128.rb +26 -2
  37. data/test/test-decimal256-array-builder.rb +14 -0
  38. data/test/test-decimal256-array.rb +5 -2
  39. data/test/test-decimal256.rb +26 -2
  40. data/test/test-field.rb +26 -0
  41. data/test/test-orc.rb +2 -2
  42. data/test/test-table.rb +16 -0
  43. data/test/test-tensor.rb +243 -2
  44. data/test/values/test-dictionary-array.rb +30 -0
  45. metadata +15 -9
@@ -0,0 +1,30 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StringArrayBuilder
20
+ private
21
+ def convert_to_arrow_value(value)
22
+ case value
23
+ when GLib::Bytes, String
24
+ value
25
+ else
26
+ value.to_s
27
+ end
28
+ end
29
+ end
30
+ end
data/lib/arrow/tensor.rb CHANGED
@@ -15,10 +15,150 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require_relative "raw-tensor-converter"
19
+
18
20
  module Arrow
19
21
  class Tensor
22
+ alias_method :initialize_raw, :initialize
23
+ # Creates a new {Arrow::Tensor}.
24
+ #
25
+ # @overload initialize(raw_tensor, data_type: nil, shape: nil, dimension_names: nil)
26
+ #
27
+ # @param raw_tensor [::Array<Numeric>] The tensor represented as a
28
+ # raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
29
+ # pass a nested `Array` for a multi-dimensional tensor.
30
+ #
31
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
32
+ # ::Array<Symbol>, Hash, nil] The element data type of the tensor.
33
+ #
34
+ # If you specify `nil`, data type is guessed from `raw_tensor`.
35
+ #
36
+ # See {Arrow::DataType.resolve} for how to specify data type.
37
+ #
38
+ # @param shape [::Array<Integer>, nil] The array of dimension sizes.
39
+ #
40
+ # If you specify `nil`, shape is guessed from `raw_tensor`.
41
+ #
42
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
43
+ # The array of the dimension names.
44
+ #
45
+ # If you specify `nil`, all dimensions have empty names.
46
+ #
47
+ # @example Create a tensor from Ruby's Array
48
+ # raw_tensor = [
49
+ # [
50
+ # [1, 2, 3, 4],
51
+ # [5, 6, 7, 8],
52
+ # ],
53
+ # [
54
+ # [9, 10, 11, 12],
55
+ # [13, 14, 15, 16],
56
+ # ],
57
+ # [
58
+ # [17, 18, 19, 20],
59
+ # [21, 22, 23, 24],
60
+ # ],
61
+ # ]
62
+ # Arrow::Tensor.new(raw_tensor)
63
+ #
64
+ # @since 10.0.0
65
+ #
66
+ # @overload initialize(data_type, data, shape, strides, dimension_names)
67
+ #
68
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
69
+ # ::Array<Symbol>, Hash] The element data type of the tensor.
70
+ #
71
+ # See {Arrow::DataType.resolve} how to specify data type.
72
+ #
73
+ # @param data [Arrow::Buffer, String] The data of the tensor.
74
+ #
75
+ # @param shape [::Array<Integer>] The array of dimension sizes.
76
+ #
77
+ # @param strides [::Array<Integer>, nil] The array of strides which
78
+ # is the number of bytes between two adjacent elements in each
79
+ # dimension.
80
+ #
81
+ # If you specify `nil` or an empty `Array`, strides are
82
+ # guessed from `data_type` and `data`.
83
+ #
84
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
85
+ # The array of the dimension names.
86
+ #
87
+ # If you specify `nil`, all dimensions doesn't have their names.
88
+ #
89
+ # @example Create a table from Arrow::Buffer
90
+ # raw_data = [
91
+ # 1, 2,
92
+ # 3, 4,
93
+ #
94
+ # 5, 6,
95
+ # 7, 8,
96
+ #
97
+ # 9, 10,
98
+ # 11, 12,
99
+ # ]
100
+ # data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
101
+ # shape = [3, 2, 2]
102
+ # strides = []
103
+ # names = ["a", "b", "c"]
104
+ # Arrow::Tensor.new(:int8, data, shape, strides, names)
105
+ def initialize(*args,
106
+ data_type: nil,
107
+ data: nil,
108
+ shape: nil,
109
+ strides: nil,
110
+ dimension_names: nil)
111
+ n_args = args.size
112
+ case n_args
113
+ when 1
114
+ converter = RawTensorConverter.new(args[0],
115
+ data_type: data_type,
116
+ shape: shape,
117
+ strides: strides,
118
+ dimension_names: dimension_names)
119
+ data_type = converter.data_type
120
+ data = converter.data
121
+ shape = converter.shape
122
+ strides = converter.strides
123
+ dimension_names = converter.dimension_names
124
+ when 0, 2..5
125
+ data_type = args[0] || data_type
126
+ data = args[1] || data
127
+ shape = args[2] || shape
128
+ strides = args[3] || strides
129
+ dimension_names = args[4] || dimension_names
130
+ if data_type.nil?
131
+ raise ArgumentError, "data_type: is missing: #{data.inspect}"
132
+ end
133
+ else
134
+ message = "wrong number of arguments (given #{n_args}, expected 0..5)"
135
+ raise ArgumentError, message
136
+ end
137
+ initialize_raw(DataType.resolve(data_type),
138
+ data,
139
+ shape,
140
+ strides,
141
+ dimension_names)
142
+ end
143
+
144
+ def dimension_names
145
+ n_dimensions.times.collect do |i|
146
+ get_dimension_name(i)
147
+ end
148
+ end
149
+
20
150
  def to_arrow
21
151
  self
22
152
  end
153
+
154
+ def to_arrow_array
155
+ if n_dimensions != 1
156
+ raise RangeError, "must be 1 dimensional tensor: #{shape.inspect}"
157
+ end
158
+ value_data_type.array_class.new(size,
159
+ buffer,
160
+ nil,
161
+ 0)
162
+ end
23
163
  end
24
164
  end
@@ -0,0 +1,31 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class TimeUnit
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ if value.is_a?(Hash) and value.size == 1 and value[:unit]
24
+ super(value[:unit])
25
+ else
26
+ super
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class Time32ArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = Time32DataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  def unit
40
28
  @unit ||= value_data_type.unit
41
29
  end
@@ -17,45 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Time32DataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::Time32DataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # time32 data type.
29
- #
30
- # The unit must be second or millisecond.
31
- #
32
- # @example Create a time32 data type with Arrow::TimeUnit
33
- # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
- #
35
- # @example Create a time32 data type with Symbol
36
- # Arrow::Time32DataType.new(:milli)
37
- #
38
- # @overload initialize(description)
39
- #
40
- # @param description [Hash] The description of the time32 data
41
- # type. It must have `:unit` value.
42
- #
43
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
44
- # the time32 data type.
45
- #
46
- # The unit must be second or millisecond.
47
- #
48
- # @example Create a time32 data type with Arrow::TimeUnit
49
- # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
- #
51
- # @example Create a time32 data type with Symbol
52
- # Arrow::Time32DataType.new(unit: :milli)
53
- def initialize(unit)
54
- if unit.is_a?(Hash)
55
- description = unit
56
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
57
29
  end
58
- initialize_raw(unit)
59
30
  end
60
31
  end
61
32
  end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class Time64ArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = Time64DataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  def unit
40
28
  @unit ||= value_data_type.unit
41
29
  end
@@ -17,45 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Time64DataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::Time64DataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # time64 data type.
29
- #
30
- # The unit must be microsecond or nanosecond.
31
- #
32
- # @example Create a time64 data type with Arrow::TimeUnit
33
- # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
- #
35
- # @example Create a time64 data type with Symbol
36
- # Arrow::Time64DataType.new(:nano)
37
- #
38
- # @overload initialize(description)
39
- #
40
- # @param description [Hash] The description of the time64 data
41
- # type. It must have `:unit` value.
42
- #
43
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
44
- # the time64 data type.
45
- #
46
- # The unit must be microsecond or nanosecond.
47
- #
48
- # @example Create a time64 data type with Arrow::TimeUnit
49
- # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
- #
51
- # @example Create a time64 data type with Symbol
52
- # Arrow::Time64DataType.new(unit: :nano)
53
- def initialize(unit)
54
- if unit.is_a?(Hash)
55
- description = unit
56
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
57
29
  end
58
- initialize_raw(unit)
59
30
  end
60
31
  end
61
32
  end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class TimestampArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = TimestampDataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  private
40
28
  def unit_id
41
29
  @unit_id ||= value_data_type.unit.nick.to_sym
@@ -17,41 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampDataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::TimestampDataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # timestamp data type.
29
- #
30
- # @example Create a timestamp data type with Arrow::TimeUnit
31
- # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
- #
33
- # @example Create a timestamp data type with Symbol
34
- # Arrow::TimestampDataType.new(:milli)
35
- #
36
- # @overload initialize(description)
37
- #
38
- # @param description [Hash] The description of the timestamp data
39
- # type. It must have `:unit` value.
40
- #
41
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
- # the timestamp data type.
43
- #
44
- # @example Create a timestamp data type with Arrow::TimeUnit
45
- # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
- #
47
- # @example Create a timestamp data type with Symbol
48
- # Arrow::TimestampDataType.new(unit: :milli)
49
- def initialize(unit)
50
- if unit.is_a?(Hash)
51
- description = unit
52
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
53
29
  end
54
- initialize_raw(unit)
55
30
  end
56
31
  end
57
32
  end
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "8.0.0"
19
+ VERSION = "10.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
- spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
+ spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
50
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
51
51
  spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")