red-arrow 0.14.1 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -0,0 +1,159 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time
20
+ attr_reader :unit
21
+ attr_reader :value
22
+ def initialize(unit, value)
23
+ @unit = unit
24
+ @value = value
25
+ @unconstructed = false
26
+ end
27
+
28
+ def ==(other)
29
+ other.is_a?(self.class) and
30
+ positive? == other.positive? and
31
+ hour == other.hour and
32
+ minute == other.minute and
33
+ second == other.second and
34
+ nano_second == other.nano_second
35
+ end
36
+
37
+ def cast(target_unit)
38
+ return self.class.new(@unit, @value) if @unit == target_unit
39
+
40
+ target_value = (hour * 60 * 60) + (minute * 60) + second
41
+ case target_unit
42
+ when TimeUnit::MILLI
43
+ target_value *= 1000
44
+ target_value += nano_second / 1000 / 1000
45
+ when TimeUnit::MICRO
46
+ target_value *= 1000 * 1000
47
+ target_value += nano_second / 1000
48
+ when TimeUnit::NANO
49
+ target_value *= 1000 * 1000 * 1000
50
+ target_value += nano_second
51
+ end
52
+ target_value = -target_value if negative?
53
+ self.class.new(target_unit, target_value)
54
+ end
55
+
56
+ def to_f
57
+ case @unit
58
+ when TimeUnit::SECOND
59
+ @value.to_f
60
+ when TimeUnit::MILLI
61
+ @value.to_f / 1000.0
62
+ when TimeUnit::MICRO
63
+ @value.to_f / 1000.0 / 1000.0
64
+ when TimeUnit::NANO
65
+ @value.to_f / 1000.0 / 1000.0 / 1000.0
66
+ end
67
+ end
68
+
69
+ def positive?
70
+ @value.positive?
71
+ end
72
+
73
+ def negative?
74
+ @value.negative?
75
+ end
76
+
77
+ def hour
78
+ unconstruct
79
+ @hour
80
+ end
81
+
82
+ def minute
83
+ unconstruct
84
+ @minute
85
+ end
86
+ alias_method :min, :minute
87
+
88
+ def second
89
+ unconstruct
90
+ @second
91
+ end
92
+ alias_method :sec, :second
93
+
94
+ def nano_second
95
+ unconstruct
96
+ @nano_second
97
+ end
98
+ alias_method :nsec, :nano_second
99
+
100
+ def to_s
101
+ unconstruct
102
+ if @nano_second.zero?
103
+ nano_second_string = ""
104
+ else
105
+ nano_second_string = (".%09d" % @nano_second).gsub(/0+\z/, "")
106
+ end
107
+ "%s%02d:%02d:%02d%s" % [
108
+ @value.negative? ? "-" : "",
109
+ @hour,
110
+ @minute,
111
+ @second,
112
+ nano_second_string,
113
+ ]
114
+ end
115
+
116
+ private
117
+ def unconstruct
118
+ return if @unconstructed
119
+ abs_value = @value.abs
120
+ case unit
121
+ when TimeUnit::SECOND
122
+ unconstruct_second(abs_value)
123
+ @nano_second = 0
124
+ when TimeUnit::MILLI
125
+ unconstruct_second(abs_value / 1000)
126
+ @nano_second = (abs_value % 1000) * 1000 * 1000
127
+ when TimeUnit::MICRO
128
+ unconstruct_second(abs_value / 1000 / 1000)
129
+ @nano_second = (abs_value % (1000 * 1000)) * 1000
130
+ when TimeUnit::NANO
131
+ unconstruct_second(abs_value / 1000 / 1000 / 1000)
132
+ @nano_second = abs_value % (1000 * 1000 * 1000)
133
+ else
134
+ raise ArgumentError, "invalid unit: #{@unit.inspect}"
135
+ end
136
+ @unconstructed = true
137
+ end
138
+
139
+ def unconstruct_second(abs_value_in_second)
140
+ if abs_value_in_second < 60
141
+ hour = 0
142
+ minute = 0
143
+ second = abs_value_in_second
144
+ elsif abs_value_in_second < (60 * 60)
145
+ hour = 0
146
+ minute = abs_value_in_second / 60
147
+ second = abs_value_in_second % 60
148
+ else
149
+ in_minute = abs_value_in_second / 60
150
+ hour = in_minute / 60
151
+ minute = in_minute % 60
152
+ second = abs_value_in_second % 60
153
+ end
154
+ @hour = hour
155
+ @minute = minute
156
+ @second = second
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time32ArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = Time32DataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
39
+ def unit
40
+ @unit ||= value_data_type.unit
41
+ end
42
+
43
+ private
44
+ def convert_to_arrow_value(value)
45
+ return value unless value.is_a?(Time)
46
+ value.cast(unit).value
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time32Array
20
+ def get_value(i)
21
+ Time.new(unit, get_raw_value(i))
22
+ end
23
+
24
+ def unit
25
+ @unit ||= value_data_type.unit
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time64ArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = Time64DataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
39
+ def unit
40
+ @unit ||= value_data_type.unit
41
+ end
42
+
43
+ private
44
+ def convert_to_arrow_value(value)
45
+ return value unless value.is_a?(Time)
46
+ value.cast(unit).value
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Time64Array
20
+ def get_value(i)
21
+ Time.new(unit, get_raw_value(i))
22
+ end
23
+
24
+ def unit
25
+ @unit ||= value_data_type.unit
26
+ end
27
+ end
28
+ end
@@ -17,6 +17,25 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampArrayBuilder
20
+ class << self
21
+ def build(unit_or_data_type, values)
22
+ builder = new(unit_or_data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :initialize_raw, :initialize
28
+ def initialize(unit_or_data_type)
29
+ case unit_or_data_type
30
+ when DataType
31
+ data_type = unit_or_data_type
32
+ else
33
+ unit = unit_or_data_type
34
+ data_type = TimestampDataType.new(unit)
35
+ end
36
+ initialize_raw(data_type)
37
+ end
38
+
20
39
  private
21
40
  def unit_id
22
41
  @unit_id ||= value_data_type.unit.nick.to_sym
@@ -27,7 +46,7 @@ module Arrow
27
46
  value = value.to_time
28
47
  end
29
48
 
30
- if value.is_a?(Time)
49
+ if value.is_a?(::Time)
31
50
  case unit_id
32
51
  when :second
33
52
  value.to_i
@@ -17,16 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampArray
20
- class << self
21
- def new(unit, values)
22
- data_type = TimestampDataType.new(unit)
23
- builder = TimestampArrayBuilder.new(data_type)
24
- builder.build(values)
25
- end
26
- end
27
-
28
20
  def get_value(i)
29
- to_time(get_raw_value(i))
21
+ cast_to_time(get_raw_value(i))
30
22
  end
31
23
 
32
24
  def unit
@@ -34,20 +26,16 @@ module Arrow
34
26
  end
35
27
 
36
28
  private
37
- def unit_id
38
- @unit_id ||= unit.nick.to_sym
39
- end
40
-
41
- def to_time(raw_value)
42
- case unit_id
43
- when :second
44
- Time.at(raw_value)
45
- when :milli
46
- Time.at(*raw_value.divmod(1_000))
47
- when :micro
48
- Time.at(*raw_value.divmod(1_000_000))
29
+ def cast_to_time(raw_value)
30
+ case unit
31
+ when TimeUnit::SECOND
32
+ ::Time.at(raw_value)
33
+ when TimeUnit::MILLI
34
+ ::Time.at(*raw_value.divmod(1_000))
35
+ when TimeUnit::MICRO
36
+ ::Time.at(*raw_value.divmod(1_000_000))
49
37
  else
50
- Time.at(raw_value / 1_000_000_000.0)
38
+ ::Time.at(raw_value / 1_000_000_000.0)
51
39
  end
52
40
  end
53
41
  end
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.14.1"
19
+ VERSION = "0.15.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
49
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
- spec.add_runtime_dependency("gio2", ">= 3.3.6")
50
+ spec.add_runtime_dependency("gio2", "= 3.3.7")
51
51
  spec.add_runtime_dependency("native-package-installer")
52
52
  spec.add_runtime_dependency("pkg-config")
53
53
 
@@ -242,10 +242,11 @@ module RawRecordsBasicArraysTests
242
242
  end
243
243
 
244
244
  def test_time32_second
245
+ unit = Arrow::TimeUnit::SECOND
245
246
  records = [
246
- [60 * 10], # 00:10:00
247
+ [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
247
248
  [nil],
248
- [60 * 60 * 2 + 9], # 02:00:09
249
+ [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
249
250
  ]
250
251
  target = build({
251
252
  column: {
@@ -258,10 +259,11 @@ module RawRecordsBasicArraysTests
258
259
  end
259
260
 
260
261
  def test_time32_milli
262
+ unit = Arrow::TimeUnit::MILLI
261
263
  records = [
262
- [(60 * 10) * 1000 + 123], # 00:10:00.123
264
+ [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
263
265
  [nil],
264
- [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
266
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
265
267
  ]
266
268
  target = build({
267
269
  column: {
@@ -274,10 +276,13 @@ module RawRecordsBasicArraysTests
274
276
  end
275
277
 
276
278
  def test_time64_micro
279
+ unit = Arrow::TimeUnit::MICRO
277
280
  records = [
278
- [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
281
+ # 00:10:00.123456
282
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
279
283
  [nil],
280
- [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
284
+ # 02:00:09.987654
285
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
281
286
  ]
282
287
  target = build({
283
288
  column: {
@@ -290,10 +295,13 @@ module RawRecordsBasicArraysTests
290
295
  end
291
296
 
292
297
  def test_time64_nano
298
+ unit = Arrow::TimeUnit::NANO
293
299
  records = [
294
- [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
300
+ # 00:10:00.123456789
301
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
295
302
  [nil],
296
- [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
303
+ # 02:00:09.987654321
304
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
297
305
  ]
298
306
  target = build({
299
307
  column: {