red-arrow 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -0,0 +1,159 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time
20
+ attr_reader :unit
21
+ attr_reader :value
22
+ def initialize(unit, value)
23
+ @unit = unit
24
+ @value = value
25
+ @unconstructed = false
26
+ end
27
+
28
+ def ==(other)
29
+ other.is_a?(self.class) and
30
+ positive? == other.positive? and
31
+ hour == other.hour and
32
+ minute == other.minute and
33
+ second == other.second and
34
+ nano_second == other.nano_second
35
+ end
36
+
37
+ def cast(target_unit)
38
+ return self.class.new(@unit, @value) if @unit == target_unit
39
+
40
+ target_value = (hour * 60 * 60) + (minute * 60) + second
41
+ case target_unit
42
+ when TimeUnit::MILLI
43
+ target_value *= 1000
44
+ target_value += nano_second / 1000 / 1000
45
+ when TimeUnit::MICRO
46
+ target_value *= 1000 * 1000
47
+ target_value += nano_second / 1000
48
+ when TimeUnit::NANO
49
+ target_value *= 1000 * 1000 * 1000
50
+ target_value += nano_second
51
+ end
52
+ target_value = -target_value if negative?
53
+ self.class.new(target_unit, target_value)
54
+ end
55
+
56
+ def to_f
57
+ case @unit
58
+ when TimeUnit::SECOND
59
+ @value.to_f
60
+ when TimeUnit::MILLI
61
+ @value.to_f / 1000.0
62
+ when TimeUnit::MICRO
63
+ @value.to_f / 1000.0 / 1000.0
64
+ when TimeUnit::NANO
65
+ @value.to_f / 1000.0 / 1000.0 / 1000.0
66
+ end
67
+ end
68
+
69
+ def positive?
70
+ @value.positive?
71
+ end
72
+
73
+ def negative?
74
+ @value.negative?
75
+ end
76
+
77
+ def hour
78
+ unconstruct
79
+ @hour
80
+ end
81
+
82
+ def minute
83
+ unconstruct
84
+ @minute
85
+ end
86
+ alias_method :min, :minute
87
+
88
+ def second
89
+ unconstruct
90
+ @second
91
+ end
92
+ alias_method :sec, :second
93
+
94
+ def nano_second
95
+ unconstruct
96
+ @nano_second
97
+ end
98
+ alias_method :nsec, :nano_second
99
+
100
+ def to_s
101
+ unconstruct
102
+ if @nano_second.zero?
103
+ nano_second_string = ""
104
+ else
105
+ nano_second_string = (".%09d" % @nano_second).gsub(/0+\z/, "")
106
+ end
107
+ "%s%02d:%02d:%02d%s" % [
108
+ @value.negative? ? "-" : "",
109
+ @hour,
110
+ @minute,
111
+ @second,
112
+ nano_second_string,
113
+ ]
114
+ end
115
+
116
+ private
117
+ def unconstruct
118
+ return if @unconstructed
119
+ abs_value = @value.abs
120
+ case unit
121
+ when TimeUnit::SECOND
122
+ unconstruct_second(abs_value)
123
+ @nano_second = 0
124
+ when TimeUnit::MILLI
125
+ unconstruct_second(abs_value / 1000)
126
+ @nano_second = (abs_value % 1000) * 1000 * 1000
127
+ when TimeUnit::MICRO
128
+ unconstruct_second(abs_value / 1000 / 1000)
129
+ @nano_second = (abs_value % (1000 * 1000)) * 1000
130
+ when TimeUnit::NANO
131
+ unconstruct_second(abs_value / 1000 / 1000 / 1000)
132
+ @nano_second = abs_value % (1000 * 1000 * 1000)
133
+ else
134
+ raise ArgumentError, "invalid unit: #{@unit.inspect}"
135
+ end
136
+ @unconstructed = true
137
+ end
138
+
139
+ def unconstruct_second(abs_value_in_second)
140
+ if abs_value_in_second < 60
141
+ hour = 0
142
+ minute = 0
143
+ second = abs_value_in_second
144
+ elsif abs_value_in_second < (60 * 60)
145
+ hour = 0
146
+ minute = abs_value_in_second / 60
147
+ second = abs_value_in_second % 60
148
+ else
149
+ in_minute = abs_value_in_second / 60
150
+ hour = in_minute / 60
151
+ minute = in_minute % 60
152
+ second = abs_value_in_second % 60
153
+ end
154
+ @hour = hour
155
+ @minute = minute
156
+ @second = second
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time32ArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = Time32DataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
39
+ def unit
40
+ @unit ||= value_data_type.unit
41
+ end
42
+
43
+ private
44
+ def convert_to_arrow_value(value)
45
+ return value unless value.is_a?(Time)
46
+ value.cast(unit).value
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time32Array
20
+ def get_value(i)
21
+ Time.new(unit, get_raw_value(i))
22
+ end
23
+
24
+ def unit
25
+ @unit ||= value_data_type.unit
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time64ArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = Time64DataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
39
+ def unit
40
+ @unit ||= value_data_type.unit
41
+ end
42
+
43
+ private
44
+ def convert_to_arrow_value(value)
45
+ return value unless value.is_a?(Time)
46
+ value.cast(unit).value
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time64Array
20
+ def get_value(i)
21
+ Time.new(unit, get_raw_value(i))
22
+ end
23
+
24
+ def unit
25
+ @unit ||= value_data_type.unit
26
+ end
27
+ end
28
+ end
@@ -17,6 +17,25 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = TimestampDataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
20
39
  private
21
40
  def unit_id
22
41
  @unit_id ||= value_data_type.unit.nick.to_sym
@@ -27,7 +46,7 @@ module Arrow
27
46
  value = value.to_time
28
47
  end
29
48
 
30
- if value.is_a?(Time)
49
+ if value.is_a?(::Time)
31
50
  case unit_id
32
51
  when :second
33
52
  value.to_i
@@ -17,16 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampArray
20
- class << self
21
- def new(unit, values)
22
- data_type = TimestampDataType.new(unit)
23
- builder = TimestampArrayBuilder.new(data_type)
24
- builder.build(values)
25
- end
26
- end
27
-
28
20
  def get_value(i)
29
- to_time(get_raw_value(i))
21
+ cast_to_time(get_raw_value(i))
30
22
  end
31
23
 
32
24
  def unit
@@ -34,20 +26,16 @@ module Arrow
34
26
  end
35
27
 
36
28
  private
37
- def unit_id
38
- @unit_id ||= unit.nick.to_sym
39
- end
40
-
41
- def to_time(raw_value)
42
- case unit_id
43
- when :second
44
- Time.at(raw_value)
45
- when :milli
46
- Time.at(*raw_value.divmod(1_000))
47
- when :micro
48
- Time.at(*raw_value.divmod(1_000_000))
29
+ def cast_to_time(raw_value)
30
+ case unit
31
+ when TimeUnit::SECOND
32
+ ::Time.at(raw_value)
33
+ when TimeUnit::MILLI
34
+ ::Time.at(*raw_value.divmod(1_000))
35
+ when TimeUnit::MICRO
36
+ ::Time.at(*raw_value.divmod(1_000_000))
49
37
  else
50
- Time.at(raw_value / 1_000_000_000.0)
38
+ ::Time.at(raw_value / 1_000_000_000.0)
51
39
  end
52
40
  end
53
41
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.14.1"
19
+ VERSION = "0.15.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
49
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
- spec.add_runtime_dependency("gio2", ">= 3.3.6")
50
+ spec.add_runtime_dependency("gio2", "= 3.3.7")
51
51
  spec.add_runtime_dependency("native-package-installer")
52
52
  spec.add_runtime_dependency("pkg-config")
53
53
 
@@ -242,10 +242,11 @@ module RawRecordsBasicArraysTests
242
242
  end
243
243
 
244
244
  def test_time32_second
245
+ unit = Arrow::TimeUnit::SECOND
245
246
  records = [
246
- [60 * 10], # 00:10:00
247
+ [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
247
248
  [nil],
248
- [60 * 60 * 2 + 9], # 02:00:09
249
+ [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
249
250
  ]
250
251
  target = build({
251
252
  column: {
@@ -258,10 +259,11 @@ module RawRecordsBasicArraysTests
258
259
  end
259
260
 
260
261
  def test_time32_milli
262
+ unit = Arrow::TimeUnit::MILLI
261
263
  records = [
262
- [(60 * 10) * 1000 + 123], # 00:10:00.123
264
+ [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
263
265
  [nil],
264
- [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
266
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
265
267
  ]
266
268
  target = build({
267
269
  column: {
@@ -274,10 +276,13 @@ module RawRecordsBasicArraysTests
274
276
  end
275
277
 
276
278
  def test_time64_micro
279
+ unit = Arrow::TimeUnit::MICRO
277
280
  records = [
278
- [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
281
+ # 00:10:00.123456
282
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
279
283
  [nil],
280
- [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
284
+ # 02:00:09.987654
285
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
281
286
  ]
282
287
  target = build({
283
288
  column: {
@@ -290,10 +295,13 @@ module RawRecordsBasicArraysTests
290
295
  end
291
296
 
292
297
  def test_time64_nano
298
+ unit = Arrow::TimeUnit::NANO
293
299
  records = [
294
- [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
300
+ # 00:10:00.123456789
301
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
295
302
  [nil],
296
- [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
303
+ # 02:00:09.987654321
304
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
297
305
  ]
298
306
  target = build({
299
307
  column: {