red-arrow 8.0.0 → 10.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +3 -0
  3. data/ext/arrow/extconf.rb +8 -4
  4. data/lib/arrow/array-builder.rb +40 -6
  5. data/lib/arrow/array-computable.rb +37 -0
  6. data/lib/arrow/array.rb +28 -0
  7. data/lib/arrow/chunked-array.rb +21 -0
  8. data/lib/arrow/column.rb +28 -0
  9. data/lib/arrow/data-type.rb +8 -3
  10. data/lib/arrow/decimal128-array-builder.rb +16 -6
  11. data/lib/arrow/decimal128.rb +14 -0
  12. data/lib/arrow/decimal256-array-builder.rb +16 -6
  13. data/lib/arrow/decimal256.rb +14 -0
  14. data/lib/arrow/field.rb +44 -3
  15. data/lib/arrow/list-data-type.rb +1 -6
  16. data/lib/arrow/loader.rb +3 -0
  17. data/lib/arrow/raw-table-converter.rb +6 -1
  18. data/lib/arrow/raw-tensor-converter.rb +89 -0
  19. data/lib/arrow/string-array-builder.rb +30 -0
  20. data/lib/arrow/tensor.rb +140 -0
  21. data/lib/arrow/time-unit.rb +31 -0
  22. data/lib/arrow/time32-array-builder.rb +2 -14
  23. data/lib/arrow/time32-data-type.rb +9 -38
  24. data/lib/arrow/time64-array-builder.rb +2 -14
  25. data/lib/arrow/time64-data-type.rb +9 -38
  26. data/lib/arrow/timestamp-array-builder.rb +2 -14
  27. data/lib/arrow/timestamp-data-type.rb +9 -34
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -1
  30. data/test/raw-records/test-dictionary-array.rb +341 -0
  31. data/test/test-array-builder.rb +62 -0
  32. data/test/test-chunked-array.rb +6 -0
  33. data/test/test-column.rb +31 -0
  34. data/test/test-decimal128-array-builder.rb +14 -0
  35. data/test/test-decimal128-array.rb +5 -2
  36. data/test/test-decimal128.rb +26 -2
  37. data/test/test-decimal256-array-builder.rb +14 -0
  38. data/test/test-decimal256-array.rb +5 -2
  39. data/test/test-decimal256.rb +26 -2
  40. data/test/test-field.rb +26 -0
  41. data/test/test-orc.rb +2 -2
  42. data/test/test-table.rb +16 -0
  43. data/test/test-tensor.rb +243 -2
  44. data/test/values/test-dictionary-array.rb +30 -0
  45. metadata +15 -9
@@ -0,0 +1,30 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StringArrayBuilder
20
+ private
21
+ def convert_to_arrow_value(value)
22
+ case value
23
+ when GLib::Bytes, String
24
+ value
25
+ else
26
+ value.to_s
27
+ end
28
+ end
29
+ end
30
+ end
data/lib/arrow/tensor.rb CHANGED
@@ -15,10 +15,150 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require_relative "raw-tensor-converter"
19
+
18
20
  module Arrow
19
21
  class Tensor
22
+ alias_method :initialize_raw, :initialize
23
+ # Creates a new {Arrow::Tensor}.
24
+ #
25
+ # @overload initialize(raw_tensor, data_type: nil, shape: nil, dimension_names: nil)
26
+ #
27
+ # @param raw_tensor [::Array<Numeric>] The tensor represented as a
28
+ # raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
29
+ # pass a nested `Array` for a multi-dimensional tensor.
30
+ #
31
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
32
+ # ::Array<Symbol>, Hash, nil] The element data type of the tensor.
33
+ #
34
+ # If you specify `nil`, data type is guessed from `raw_tensor`.
35
+ #
36
+ # See {Arrow::DataType.resolve} for how to specify data type.
37
+ #
38
+ # @param shape [::Array<Integer>, nil] The array of dimension sizes.
39
+ #
40
+ # If you specify `nil`, shape is guessed from `raw_tensor`.
41
+ #
42
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
43
+ # The array of the dimension names.
44
+ #
45
+ # If you specify `nil`, all dimensions have empty names.
46
+ #
47
+ # @example Create a tensor from Ruby's Array
48
+ # raw_tensor = [
49
+ # [
50
+ # [1, 2, 3, 4],
51
+ # [5, 6, 7, 8],
52
+ # ],
53
+ # [
54
+ # [9, 10, 11, 12],
55
+ # [13, 14, 15, 16],
56
+ # ],
57
+ # [
58
+ # [17, 18, 19, 20],
59
+ # [21, 22, 23, 24],
60
+ # ],
61
+ # ]
62
+ # Arrow::Tensor.new(raw_tensor)
63
+ #
64
+ # @since 10.0.0
65
+ #
66
+ # @overload initialize(data_type, data, shape, strides, dimension_names)
67
+ #
68
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
69
+ # ::Array<Symbol>, Hash] The element data type of the tensor.
70
+ #
71
+ # See {Arrow::DataType.resolve} how to specify data type.
72
+ #
73
+ # @param data [Arrow::Buffer, String] The data of the tensor.
74
+ #
75
+ # @param shape [::Array<Integer>] The array of dimension sizes.
76
+ #
77
+ # @param strides [::Array<Integer>, nil] The array of strides which
78
+ # is the number of bytes between two adjacent elements in each
79
+ # dimension.
80
+ #
81
+ # If you specify `nil` or an empty `Array`, strides are
82
+ # guessed from `data_type` and `data`.
83
+ #
84
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
85
+ # The array of the dimension names.
86
+ #
87
+ # If you specify `nil`, all dimensions doesn't have their names.
88
+ #
89
+ # @example Create a table from Arrow::Buffer
90
+ # raw_data = [
91
+ # 1, 2,
92
+ # 3, 4,
93
+ #
94
+ # 5, 6,
95
+ # 7, 8,
96
+ #
97
+ # 9, 10,
98
+ # 11, 12,
99
+ # ]
100
+ # data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
101
+ # shape = [3, 2, 2]
102
+ # strides = []
103
+ # names = ["a", "b", "c"]
104
+ # Arrow::Tensor.new(:int8, data, shape, strides, names)
105
+ def initialize(*args,
106
+ data_type: nil,
107
+ data: nil,
108
+ shape: nil,
109
+ strides: nil,
110
+ dimension_names: nil)
111
+ n_args = args.size
112
+ case n_args
113
+ when 1
114
+ converter = RawTensorConverter.new(args[0],
115
+ data_type: data_type,
116
+ shape: shape,
117
+ strides: strides,
118
+ dimension_names: dimension_names)
119
+ data_type = converter.data_type
120
+ data = converter.data
121
+ shape = converter.shape
122
+ strides = converter.strides
123
+ dimension_names = converter.dimension_names
124
+ when 0, 2..5
125
+ data_type = args[0] || data_type
126
+ data = args[1] || data
127
+ shape = args[2] || shape
128
+ strides = args[3] || strides
129
+ dimension_names = args[4] || dimension_names
130
+ if data_type.nil?
131
+ raise ArgumentError, "data_type: is missing: #{data.inspect}"
132
+ end
133
+ else
134
+ message = "wrong number of arguments (given #{n_args}, expected 0..5)"
135
+ raise ArgumentError, message
136
+ end
137
+ initialize_raw(DataType.resolve(data_type),
138
+ data,
139
+ shape,
140
+ strides,
141
+ dimension_names)
142
+ end
143
+
144
+ def dimension_names
145
+ n_dimensions.times.collect do |i|
146
+ get_dimension_name(i)
147
+ end
148
+ end
149
+
20
150
  def to_arrow
21
151
  self
22
152
  end
153
+
154
+ def to_arrow_array
155
+ if n_dimensions != 1
156
+ raise RangeError, "must be 1 dimensional tensor: #{shape.inspect}"
157
+ end
158
+ value_data_type.array_class.new(size,
159
+ buffer,
160
+ nil,
161
+ 0)
162
+ end
23
163
  end
24
164
  end
@@ -0,0 +1,31 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class TimeUnit
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ if value.is_a?(Hash) and value.size == 1 and value[:unit]
24
+ super(value[:unit])
25
+ else
26
+ super
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class Time32ArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = Time32DataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  def unit
40
28
  @unit ||= value_data_type.unit
41
29
  end
@@ -17,45 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Time32DataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::Time32DataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # time32 data type.
29
- #
30
- # The unit must be second or millisecond.
31
- #
32
- # @example Create a time32 data type with Arrow::TimeUnit
33
- # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
- #
35
- # @example Create a time32 data type with Symbol
36
- # Arrow::Time32DataType.new(:milli)
37
- #
38
- # @overload initialize(description)
39
- #
40
- # @param description [Hash] The description of the time32 data
41
- # type. It must have `:unit` value.
42
- #
43
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
44
- # the time32 data type.
45
- #
46
- # The unit must be second or millisecond.
47
- #
48
- # @example Create a time32 data type with Arrow::TimeUnit
49
- # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
- #
51
- # @example Create a time32 data type with Symbol
52
- # Arrow::Time32DataType.new(unit: :milli)
53
- def initialize(unit)
54
- if unit.is_a?(Hash)
55
- description = unit
56
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
57
29
  end
58
- initialize_raw(unit)
59
30
  end
60
31
  end
61
32
  end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class Time64ArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = Time64DataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  def unit
40
28
  @unit ||= value_data_type.unit
41
29
  end
@@ -17,45 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Time64DataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::Time64DataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # time64 data type.
29
- #
30
- # The unit must be microsecond or nanosecond.
31
- #
32
- # @example Create a time64 data type with Arrow::TimeUnit
33
- # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
- #
35
- # @example Create a time64 data type with Symbol
36
- # Arrow::Time64DataType.new(:nano)
37
- #
38
- # @overload initialize(description)
39
- #
40
- # @param description [Hash] The description of the time64 data
41
- # type. It must have `:unit` value.
42
- #
43
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
44
- # the time64 data type.
45
- #
46
- # The unit must be microsecond or nanosecond.
47
- #
48
- # @example Create a time64 data type with Arrow::TimeUnit
49
- # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
- #
51
- # @example Create a time64 data type with Symbol
52
- # Arrow::Time64DataType.new(unit: :nano)
53
- def initialize(unit)
54
- if unit.is_a?(Hash)
55
- description = unit
56
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
57
29
  end
58
- initialize_raw(unit)
59
30
  end
60
31
  end
61
32
  end
@@ -18,24 +18,12 @@
18
18
  module Arrow
19
19
  class TimestampArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = TimestampDataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  private
40
28
  def unit_id
41
29
  @unit_id ||= value_data_type.unit.nick.to_sym
@@ -17,41 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampDataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::TimestampDataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # timestamp data type.
29
- #
30
- # @example Create a timestamp data type with Arrow::TimeUnit
31
- # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
- #
33
- # @example Create a timestamp data type with Symbol
34
- # Arrow::TimestampDataType.new(:milli)
35
- #
36
- # @overload initialize(description)
37
- #
38
- # @param description [Hash] The description of the timestamp data
39
- # type. It must have `:unit` value.
40
- #
41
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
- # the timestamp data type.
43
- #
44
- # @example Create a timestamp data type with Arrow::TimeUnit
45
- # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
- #
47
- # @example Create a timestamp data type with Symbol
48
- # Arrow::TimestampDataType.new(unit: :milli)
49
- def initialize(unit)
50
- if unit.is_a?(Hash)
51
- description = unit
52
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
53
29
  end
54
- initialize_raw(unit)
55
30
  end
56
31
  end
57
32
  end
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "8.0.0"
19
+ VERSION = "10.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
- spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
+ spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
50
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
51
51
  spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")