red-arrow 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -71,17 +71,17 @@ module Arrow
71
71
  when ::Array
72
72
  append_value_raw
73
73
  value.each_with_index do |sub_value, i|
74
- self[i].append_value(sub_value)
74
+ self[i].append(sub_value)
75
75
  end
76
76
  when Arrow::Struct
77
77
  append_value_raw
78
78
  value.values.each_with_index do |sub_value, i|
79
- self[i].append_value(sub_value)
79
+ self[i].append(sub_value)
80
80
  end
81
81
  when Hash
82
82
  append_value_raw
83
83
  value.each do |name, sub_value|
84
- self[name].append_value(sub_value)
84
+ self[name].append(sub_value)
85
85
  end
86
86
  else
87
87
  message =
@@ -19,17 +19,25 @@ require "arrow/struct"
19
19
 
20
20
  module Arrow
21
21
  class StructArray
22
- def [](i)
23
- warn("Use #{self.class}\#find_field instead. " +
24
- "This will returns Arrow::Struct instead of Arrow::Array " +
25
- "since 0.13.0.")
26
- get_field(i)
27
- end
28
-
22
+ # @param i [Integer]
23
+ # The index of the value to be gotten. You must specify the value index.
24
+ #
25
+ # You can use {Arrow::Array#[]} for convenient value access.
26
+ #
27
+ # @return [Arrow::Struct] The `i`-th value.
29
28
  def get_value(i)
30
29
  Struct.new(self, i)
31
30
  end
32
31
 
32
+ # @overload find_field(index)
33
+ # @param index [Integer] The index of the field to be found.
34
+ # @return [Arrow::Array, nil]
35
+ # The `index`-th field or `nil` for out of range.
36
+ #
37
+ # @overload find_field(name)
38
+ # @param index [String, Symbol] The name of the field to be found.
39
+ # @return [Arrow::Array, nil]
40
+ # The field that has `name` or `nil` for nonexistent name.
33
41
  def find_field(index_or_name)
34
42
  case index_or_name
35
43
  when String, Symbol
@@ -64,5 +64,16 @@ module Arrow
64
64
  end
65
65
  super
66
66
  end
67
+
68
+ def ==(other)
69
+ other.is_a?(self.class) and
70
+ @array == other.array and
71
+ @index == other.index
72
+ end
73
+
74
+ protected
75
+ def array
76
+ @array
77
+ end
67
78
  end
68
79
  end
@@ -18,14 +18,14 @@
18
18
  module Arrow
19
19
  class TableLoader
20
20
  class << self
21
- def load(output, options={})
22
- new(output, options).load
21
+ def load(input, options={})
22
+ new(input, options).load
23
23
  end
24
24
  end
25
25
 
26
- def initialize(output, options={})
27
- output = output.to_path if output.respond_to?(:to_path)
28
- @output = output
26
+ def initialize(input, options={})
27
+ input = input.to_path if input.respond_to?(:to_path)
28
+ @input = input
29
29
  @options = options
30
30
  fill_options
31
31
  end
@@ -50,7 +50,7 @@ module Arrow
50
50
  __send__(custom_load_method)
51
51
  else
52
52
  # For backward compatibility.
53
- __send__(custom_load_method, @output)
53
+ __send__(custom_load_method, @input)
54
54
  end
55
55
  end
56
56
 
@@ -60,10 +60,10 @@ module Arrow
60
60
  return
61
61
  end
62
62
 
63
- if @output.is_a?(Buffer)
63
+ if @input.is_a?(Buffer)
64
64
  info = {}
65
65
  else
66
- extension = PathExtension.new(@output)
66
+ extension = PathExtension.new(@input)
67
67
  info = extension.extract
68
68
  end
69
69
  format = info[:format]
@@ -79,10 +79,10 @@ module Arrow
79
79
  end
80
80
 
81
81
  def open_input_stream
82
- if @output.is_a?(Buffer)
83
- BufferInputStream.new(@output)
82
+ if @input.is_a?(Buffer)
83
+ BufferInputStream.new(@input)
84
84
  else
85
- MemoryMappedInputStream.new(@output)
85
+ MemoryMappedInputStream.new(@input)
86
86
  end
87
87
  end
88
88
 
@@ -152,10 +152,10 @@ module Arrow
152
152
  def load_as_csv
153
153
  options = @options.dup
154
154
  options.delete(:format)
155
- if @output.is_a?(Buffer)
156
- CSVLoader.load(@output.data.to_s, options)
155
+ if @input.is_a?(Buffer)
156
+ CSVLoader.load(@input.data.to_s, options)
157
157
  else
158
- CSVLoader.load(Pathname.new(@output), options)
158
+ CSVLoader.load(Pathname.new(@input), options)
159
159
  end
160
160
  end
161
161
 
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.12.0"
19
+ VERSION = "0.13.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -39,21 +39,25 @@ Gem::Specification.new do |spec|
39
39
  spec.license = "Apache-2.0"
40
40
  spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
41
41
  spec.files += ["LICENSE.txt", "NOTICE.txt"]
42
+ spec.files += Dir.glob("ext/**/*.{cpp,hpp,rb}")
42
43
  spec.files += Dir.glob("lib/**/*.rb")
43
44
  spec.files += Dir.glob("image/*.*")
44
45
  spec.files += Dir.glob("doc/text/*")
45
46
  spec.test_files += Dir.glob("test/**/*")
46
- spec.extensions = ["dependency-check/Rakefile"]
47
+ spec.extensions = ["ext/arrow/extconf.rb"]
47
48
 
48
- spec.add_runtime_dependency("gobject-introspection", ">= 3.3.1")
49
- spec.add_runtime_dependency("pkg-config")
49
+ spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
+ spec.add_runtime_dependency("gio2", ">= 3.3.6")
50
51
  spec.add_runtime_dependency("native-package-installer")
52
+ spec.add_runtime_dependency("pkg-config")
51
53
 
54
+ spec.add_development_dependency("benchmark-driver")
52
55
  spec.add_development_dependency("bundler")
56
+ spec.add_development_dependency("faker")
53
57
  spec.add_development_dependency("rake")
54
58
  spec.add_development_dependency("redcarpet")
55
59
  spec.add_development_dependency("test-unit")
56
60
  spec.add_development_dependency("yard")
57
61
 
58
- spec.metadata["msys2_mingw_dependencies"] = "apache-arrow"
62
+ spec.metadata["msys2_mingw_dependencies"] = "arrow"
59
63
  end
@@ -0,0 +1,349 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
19
+ test("NullArray") do
20
+ records = [
21
+ [nil],
22
+ [nil],
23
+ [nil],
24
+ [nil],
25
+ ]
26
+ array = Arrow::NullArray.new(records.size)
27
+ schema = Arrow::Schema.new(column: :null)
28
+ record_batch = Arrow::RecordBatch.new(schema,
29
+ records.size,
30
+ [array])
31
+ assert_equal(records, record_batch.raw_records)
32
+ end
33
+
34
+ test("BooleanArray") do
35
+ records = [
36
+ [true],
37
+ [nil],
38
+ [false],
39
+ ]
40
+ record_batch = Arrow::RecordBatch.new({column: :boolean},
41
+ records)
42
+ assert_equal(records, record_batch.raw_records)
43
+ end
44
+
45
+ test("Int8Array") do
46
+ records = [
47
+ [-(2 ** 7)],
48
+ [nil],
49
+ [(2 ** 7) - 1],
50
+ ]
51
+ record_batch = Arrow::RecordBatch.new({column: :int8},
52
+ records)
53
+ assert_equal(records, record_batch.raw_records)
54
+ end
55
+
56
+ test("UInt8Array") do
57
+ records = [
58
+ [0],
59
+ [nil],
60
+ [(2 ** 8) - 1],
61
+ ]
62
+ record_batch = Arrow::RecordBatch.new({column: :uint8},
63
+ records)
64
+ assert_equal(records, record_batch.raw_records)
65
+ end
66
+
67
+ test("Int16Array") do
68
+ records = [
69
+ [-(2 ** 15)],
70
+ [nil],
71
+ [(2 ** 15) - 1],
72
+ ]
73
+ record_batch = Arrow::RecordBatch.new({column: :int16},
74
+ records)
75
+ assert_equal(records, record_batch.raw_records)
76
+ end
77
+
78
+ test("UInt16Array") do
79
+ records = [
80
+ [0],
81
+ [nil],
82
+ [(2 ** 16) - 1],
83
+ ]
84
+ record_batch = Arrow::RecordBatch.new({column: :uint16},
85
+ records)
86
+ assert_equal(records, record_batch.raw_records)
87
+ end
88
+
89
+ test("Int32Array") do
90
+ records = [
91
+ [-(2 ** 31)],
92
+ [nil],
93
+ [(2 ** 31) - 1],
94
+ ]
95
+ record_batch = Arrow::RecordBatch.new({column: :int32},
96
+ records)
97
+ assert_equal(records, record_batch.raw_records)
98
+ end
99
+
100
+ test("UInt32Array") do
101
+ records = [
102
+ [0],
103
+ [nil],
104
+ [(2 ** 32) - 1],
105
+ ]
106
+ record_batch = Arrow::RecordBatch.new({column: :uint32},
107
+ records)
108
+ assert_equal(records, record_batch.raw_records)
109
+ end
110
+
111
+ test("Int64Array") do
112
+ records = [
113
+ [-(2 ** 63)],
114
+ [nil],
115
+ [(2 ** 63) - 1],
116
+ ]
117
+ record_batch = Arrow::RecordBatch.new({column: :int64},
118
+ records)
119
+ assert_equal(records, record_batch.raw_records)
120
+ end
121
+
122
+ test("UInt64Array") do
123
+ records = [
124
+ [0],
125
+ [nil],
126
+ [(2 ** 64) - 1],
127
+ ]
128
+ record_batch = Arrow::RecordBatch.new({column: :uint64},
129
+ records)
130
+ assert_equal(records, record_batch.raw_records)
131
+ end
132
+
133
+ test("FloatArray") do
134
+ records = [
135
+ [-1.0],
136
+ [nil],
137
+ [1.0],
138
+ ]
139
+ record_batch = Arrow::RecordBatch.new({column: :float},
140
+ records)
141
+ assert_equal(records, record_batch.raw_records)
142
+ end
143
+
144
+ test("DoubleArray") do
145
+ records = [
146
+ [-1.0],
147
+ [nil],
148
+ [1.0],
149
+ ]
150
+ record_batch = Arrow::RecordBatch.new({column: :double},
151
+ records)
152
+ assert_equal(records, record_batch.raw_records)
153
+ end
154
+
155
+ test("BinaryArray") do
156
+ records = [
157
+ ["\x00".b],
158
+ [nil],
159
+ ["\xff".b],
160
+ ]
161
+ record_batch = Arrow::RecordBatch.new({column: :binary},
162
+ records)
163
+ assert_equal(records, record_batch.raw_records)
164
+ end
165
+
166
+ test("StringArray") do
167
+ records = [
168
+ ["Ruby"],
169
+ [nil],
170
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
171
+ ]
172
+ record_batch = Arrow::RecordBatch.new({column: :string},
173
+ records)
174
+ assert_equal(records, record_batch.raw_records)
175
+ end
176
+
177
+ test("Date32Array") do
178
+ records = [
179
+ [Date.new(1960, 1, 1)],
180
+ [nil],
181
+ [Date.new(2017, 8, 23)],
182
+ ]
183
+ record_batch = Arrow::RecordBatch.new({column: :date32},
184
+ records)
185
+ assert_equal(records, record_batch.raw_records)
186
+ end
187
+
188
+ test("Date64Array") do
189
+ records = [
190
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
191
+ [nil],
192
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
193
+ ]
194
+ record_batch = Arrow::RecordBatch.new({column: :date64},
195
+ records)
196
+ assert_equal(records, record_batch.raw_records)
197
+ end
198
+
199
+ sub_test_case("TimestampArray") do
200
+ test("second") do
201
+ records = [
202
+ [Time.parse("1960-01-01T02:09:30Z")],
203
+ [nil],
204
+ [Time.parse("2017-08-23T14:57:02Z")],
205
+ ]
206
+ record_batch = Arrow::RecordBatch.new({
207
+ column: {
208
+ type: :timestamp,
209
+ unit: :second,
210
+ }
211
+ },
212
+ records)
213
+ assert_equal(records, record_batch.raw_records)
214
+ end
215
+
216
+ test("milli") do
217
+ records = [
218
+ [Time.parse("1960-01-01T02:09:30.123Z")],
219
+ [nil],
220
+ [Time.parse("2017-08-23T14:57:02.987Z")],
221
+ ]
222
+ record_batch = Arrow::RecordBatch.new({
223
+ column: {
224
+ type: :timestamp,
225
+ unit: :milli,
226
+ }
227
+ },
228
+ records)
229
+ assert_equal(records, record_batch.raw_records)
230
+ end
231
+
232
+ test("micro") do
233
+ records = [
234
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
235
+ [nil],
236
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
237
+ ]
238
+ record_batch = Arrow::RecordBatch.new({
239
+ column: {
240
+ type: :timestamp,
241
+ unit: :micro,
242
+ }
243
+ },
244
+ records)
245
+ assert_equal(records, record_batch.raw_records)
246
+ end
247
+
248
+ test("nano") do
249
+ records = [
250
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
251
+ [nil],
252
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
253
+ ]
254
+ record_batch = Arrow::RecordBatch.new({
255
+ column: {
256
+ type: :timestamp,
257
+ unit: :nano,
258
+ }
259
+ },
260
+ records)
261
+ assert_equal(records, record_batch.raw_records)
262
+ end
263
+ end
264
+
265
+ sub_test_case("Time32Array") do
266
+ test("second") do
267
+ records = [
268
+ [60 * 10], # 00:10:00
269
+ [nil],
270
+ [60 * 60 * 2 + 9], # 02:00:09
271
+ ]
272
+ record_batch = Arrow::RecordBatch.new({
273
+ column: {
274
+ type: :time32,
275
+ unit: :second,
276
+ }
277
+ },
278
+ records)
279
+ assert_equal(records, record_batch.raw_records)
280
+ end
281
+
282
+ test("milli") do
283
+ records = [
284
+ [(60 * 10) * 1000 + 123], # 00:10:00.123
285
+ [nil],
286
+ [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
287
+ ]
288
+ record_batch = Arrow::RecordBatch.new({
289
+ column: {
290
+ type: :time32,
291
+ unit: :milli,
292
+ }
293
+ },
294
+ records)
295
+ assert_equal(records, record_batch.raw_records)
296
+ end
297
+ end
298
+
299
+ sub_test_case("Time64Array") do
300
+ test("micro") do
301
+ records = [
302
+ [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
303
+ [nil],
304
+ [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
305
+ ]
306
+ record_batch = Arrow::RecordBatch.new({
307
+ column: {
308
+ type: :time64,
309
+ unit: :micro,
310
+ }
311
+ },
312
+ records)
313
+ assert_equal(records, record_batch.raw_records)
314
+ end
315
+
316
+ test("nano") do
317
+ records = [
318
+ [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
319
+ [nil],
320
+ [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
321
+ ]
322
+ record_batch = Arrow::RecordBatch.new({
323
+ column: {
324
+ type: :time64,
325
+ unit: :nano,
326
+ }
327
+ },
328
+ records)
329
+ assert_equal(records, record_batch.raw_records)
330
+ end
331
+ end
332
+
333
+ test("Decimal128Array") do
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ record_batch = Arrow::RecordBatch.new({
340
+ column: {
341
+ type: :decimal128,
342
+ precision: 8,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, record_batch.raw_records)
348
+ end
349
+ end