red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -71,17 +71,17 @@ module Arrow
71
71
  when ::Array
72
72
  append_value_raw
73
73
  value.each_with_index do |sub_value, i|
74
- self[i].append_value(sub_value)
74
+ self[i].append(sub_value)
75
75
  end
76
76
  when Arrow::Struct
77
77
  append_value_raw
78
78
  value.values.each_with_index do |sub_value, i|
79
- self[i].append_value(sub_value)
79
+ self[i].append(sub_value)
80
80
  end
81
81
  when Hash
82
82
  append_value_raw
83
83
  value.each do |name, sub_value|
84
- self[name].append_value(sub_value)
84
+ self[name].append(sub_value)
85
85
  end
86
86
  else
87
87
  message =
@@ -19,17 +19,25 @@ require "arrow/struct"
19
19
 
20
20
  module Arrow
21
21
  class StructArray
22
- def [](i)
23
- warn("Use #{self.class}\#find_field instead. " +
24
- "This will returns Arrow::Struct instead of Arrow::Array " +
25
- "since 0.13.0.")
26
- get_field(i)
27
- end
28
-
22
+ # @param i [Integer]
23
+ # The index of the value to be gotten. You must specify the value index.
24
+ #
25
+ # You can use {Arrow::Array#[]} for convenient value access.
26
+ #
27
+ # @return [Arrow::Struct] The `i`-th value.
29
28
  def get_value(i)
30
29
  Struct.new(self, i)
31
30
  end
32
31
 
32
+ # @overload find_field(index)
33
+ # @param index [Integer] The index of the field to be found.
34
+ # @return [Arrow::Array, nil]
35
+ # The `index`-th field or `nil` for out of range.
36
+ #
37
+ # @overload find_field(name)
38
+ # @param index [String, Symbol] The name of the field to be found.
39
+ # @return [Arrow::Array, nil]
40
+ # The field that has `name` or `nil` for nonexistent name.
33
41
  def find_field(index_or_name)
34
42
  case index_or_name
35
43
  when String, Symbol
@@ -64,5 +64,16 @@ module Arrow
64
64
  end
65
65
  super
66
66
  end
67
+
68
+ def ==(other)
69
+ other.is_a?(self.class) and
70
+ @array == other.array and
71
+ @index == other.index
72
+ end
73
+
74
+ protected
75
+ def array
76
+ @array
77
+ end
67
78
  end
68
79
  end
@@ -18,14 +18,14 @@
18
18
  module Arrow
19
19
  class TableLoader
20
20
  class << self
21
- def load(output, options={})
22
- new(output, options).load
21
+ def load(input, options={})
22
+ new(input, options).load
23
23
  end
24
24
  end
25
25
 
26
- def initialize(output, options={})
27
- output = output.to_path if output.respond_to?(:to_path)
28
- @output = output
26
+ def initialize(input, options={})
27
+ input = input.to_path if input.respond_to?(:to_path)
28
+ @input = input
29
29
  @options = options
30
30
  fill_options
31
31
  end
@@ -50,7 +50,7 @@ module Arrow
50
50
  __send__(custom_load_method)
51
51
  else
52
52
  # For backward compatibility.
53
- __send__(custom_load_method, @output)
53
+ __send__(custom_load_method, @input)
54
54
  end
55
55
  end
56
56
 
@@ -60,10 +60,10 @@ module Arrow
60
60
  return
61
61
  end
62
62
 
63
- if @output.is_a?(Buffer)
63
+ if @input.is_a?(Buffer)
64
64
  info = {}
65
65
  else
66
- extension = PathExtension.new(@output)
66
+ extension = PathExtension.new(@input)
67
67
  info = extension.extract
68
68
  end
69
69
  format = info[:format]
@@ -79,10 +79,10 @@ module Arrow
79
79
  end
80
80
 
81
81
  def open_input_stream
82
- if @output.is_a?(Buffer)
83
- BufferInputStream.new(@output)
82
+ if @input.is_a?(Buffer)
83
+ BufferInputStream.new(@input)
84
84
  else
85
- MemoryMappedInputStream.new(@output)
85
+ MemoryMappedInputStream.new(@input)
86
86
  end
87
87
  end
88
88
 
@@ -152,10 +152,10 @@ module Arrow
152
152
  def load_as_csv
153
153
  options = @options.dup
154
154
  options.delete(:format)
155
- if @output.is_a?(Buffer)
156
- CSVLoader.load(@output.data.to_s, options)
155
+ if @input.is_a?(Buffer)
156
+ CSVLoader.load(@input.data.to_s, options)
157
157
  else
158
- CSVLoader.load(Pathname.new(@output), options)
158
+ CSVLoader.load(Pathname.new(@input), options)
159
159
  end
160
160
  end
161
161
 
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.12.0"
19
+ VERSION = "0.13.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -39,21 +39,25 @@ Gem::Specification.new do |spec|
39
39
  spec.license = "Apache-2.0"
40
40
  spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
41
41
  spec.files += ["LICENSE.txt", "NOTICE.txt"]
42
+ spec.files += Dir.glob("ext/**/*.{cpp,hpp,rb}")
42
43
  spec.files += Dir.glob("lib/**/*.rb")
43
44
  spec.files += Dir.glob("image/*.*")
44
45
  spec.files += Dir.glob("doc/text/*")
45
46
  spec.test_files += Dir.glob("test/**/*")
46
- spec.extensions = ["dependency-check/Rakefile"]
47
+ spec.extensions = ["ext/arrow/extconf.rb"]
47
48
 
48
- spec.add_runtime_dependency("gobject-introspection", ">= 3.3.1")
49
- spec.add_runtime_dependency("pkg-config")
49
+ spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
+ spec.add_runtime_dependency("gio2", ">= 3.3.6")
50
51
  spec.add_runtime_dependency("native-package-installer")
52
+ spec.add_runtime_dependency("pkg-config")
51
53
 
54
+ spec.add_development_dependency("benchmark-driver")
52
55
  spec.add_development_dependency("bundler")
56
+ spec.add_development_dependency("faker")
53
57
  spec.add_development_dependency("rake")
54
58
  spec.add_development_dependency("redcarpet")
55
59
  spec.add_development_dependency("test-unit")
56
60
  spec.add_development_dependency("yard")
57
61
 
58
- spec.metadata["msys2_mingw_dependencies"] = "apache-arrow"
62
+ spec.metadata["msys2_mingw_dependencies"] = "arrow"
59
63
  end
@@ -0,0 +1,349 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
19
+ test("NullArray") do
20
+ records = [
21
+ [nil],
22
+ [nil],
23
+ [nil],
24
+ [nil],
25
+ ]
26
+ array = Arrow::NullArray.new(records.size)
27
+ schema = Arrow::Schema.new(column: :null)
28
+ record_batch = Arrow::RecordBatch.new(schema,
29
+ records.size,
30
+ [array])
31
+ assert_equal(records, record_batch.raw_records)
32
+ end
33
+
34
+ test("BooleanArray") do
35
+ records = [
36
+ [true],
37
+ [nil],
38
+ [false],
39
+ ]
40
+ record_batch = Arrow::RecordBatch.new({column: :boolean},
41
+ records)
42
+ assert_equal(records, record_batch.raw_records)
43
+ end
44
+
45
+ test("Int8Array") do
46
+ records = [
47
+ [-(2 ** 7)],
48
+ [nil],
49
+ [(2 ** 7) - 1],
50
+ ]
51
+ record_batch = Arrow::RecordBatch.new({column: :int8},
52
+ records)
53
+ assert_equal(records, record_batch.raw_records)
54
+ end
55
+
56
+ test("UInt8Array") do
57
+ records = [
58
+ [0],
59
+ [nil],
60
+ [(2 ** 8) - 1],
61
+ ]
62
+ record_batch = Arrow::RecordBatch.new({column: :uint8},
63
+ records)
64
+ assert_equal(records, record_batch.raw_records)
65
+ end
66
+
67
+ test("Int16Array") do
68
+ records = [
69
+ [-(2 ** 15)],
70
+ [nil],
71
+ [(2 ** 15) - 1],
72
+ ]
73
+ record_batch = Arrow::RecordBatch.new({column: :int16},
74
+ records)
75
+ assert_equal(records, record_batch.raw_records)
76
+ end
77
+
78
+ test("UInt16Array") do
79
+ records = [
80
+ [0],
81
+ [nil],
82
+ [(2 ** 16) - 1],
83
+ ]
84
+ record_batch = Arrow::RecordBatch.new({column: :uint16},
85
+ records)
86
+ assert_equal(records, record_batch.raw_records)
87
+ end
88
+
89
+ test("Int32Array") do
90
+ records = [
91
+ [-(2 ** 31)],
92
+ [nil],
93
+ [(2 ** 31) - 1],
94
+ ]
95
+ record_batch = Arrow::RecordBatch.new({column: :int32},
96
+ records)
97
+ assert_equal(records, record_batch.raw_records)
98
+ end
99
+
100
+ test("UInt32Array") do
101
+ records = [
102
+ [0],
103
+ [nil],
104
+ [(2 ** 32) - 1],
105
+ ]
106
+ record_batch = Arrow::RecordBatch.new({column: :uint32},
107
+ records)
108
+ assert_equal(records, record_batch.raw_records)
109
+ end
110
+
111
+ test("Int64Array") do
112
+ records = [
113
+ [-(2 ** 63)],
114
+ [nil],
115
+ [(2 ** 63) - 1],
116
+ ]
117
+ record_batch = Arrow::RecordBatch.new({column: :int64},
118
+ records)
119
+ assert_equal(records, record_batch.raw_records)
120
+ end
121
+
122
+ test("UInt64Array") do
123
+ records = [
124
+ [0],
125
+ [nil],
126
+ [(2 ** 64) - 1],
127
+ ]
128
+ record_batch = Arrow::RecordBatch.new({column: :uint64},
129
+ records)
130
+ assert_equal(records, record_batch.raw_records)
131
+ end
132
+
133
+ test("FloatArray") do
134
+ records = [
135
+ [-1.0],
136
+ [nil],
137
+ [1.0],
138
+ ]
139
+ record_batch = Arrow::RecordBatch.new({column: :float},
140
+ records)
141
+ assert_equal(records, record_batch.raw_records)
142
+ end
143
+
144
+ test("DoubleArray") do
145
+ records = [
146
+ [-1.0],
147
+ [nil],
148
+ [1.0],
149
+ ]
150
+ record_batch = Arrow::RecordBatch.new({column: :double},
151
+ records)
152
+ assert_equal(records, record_batch.raw_records)
153
+ end
154
+
155
+ test("BinaryArray") do
156
+ records = [
157
+ ["\x00".b],
158
+ [nil],
159
+ ["\xff".b],
160
+ ]
161
+ record_batch = Arrow::RecordBatch.new({column: :binary},
162
+ records)
163
+ assert_equal(records, record_batch.raw_records)
164
+ end
165
+
166
+ test("StringArray") do
167
+ records = [
168
+ ["Ruby"],
169
+ [nil],
170
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
171
+ ]
172
+ record_batch = Arrow::RecordBatch.new({column: :string},
173
+ records)
174
+ assert_equal(records, record_batch.raw_records)
175
+ end
176
+
177
+ test("Date32Array") do
178
+ records = [
179
+ [Date.new(1960, 1, 1)],
180
+ [nil],
181
+ [Date.new(2017, 8, 23)],
182
+ ]
183
+ record_batch = Arrow::RecordBatch.new({column: :date32},
184
+ records)
185
+ assert_equal(records, record_batch.raw_records)
186
+ end
187
+
188
+ test("Date64Array") do
189
+ records = [
190
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
191
+ [nil],
192
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
193
+ ]
194
+ record_batch = Arrow::RecordBatch.new({column: :date64},
195
+ records)
196
+ assert_equal(records, record_batch.raw_records)
197
+ end
198
+
199
+ sub_test_case("TimestampArray") do
200
+ test("second") do
201
+ records = [
202
+ [Time.parse("1960-01-01T02:09:30Z")],
203
+ [nil],
204
+ [Time.parse("2017-08-23T14:57:02Z")],
205
+ ]
206
+ record_batch = Arrow::RecordBatch.new({
207
+ column: {
208
+ type: :timestamp,
209
+ unit: :second,
210
+ }
211
+ },
212
+ records)
213
+ assert_equal(records, record_batch.raw_records)
214
+ end
215
+
216
+ test("milli") do
217
+ records = [
218
+ [Time.parse("1960-01-01T02:09:30.123Z")],
219
+ [nil],
220
+ [Time.parse("2017-08-23T14:57:02.987Z")],
221
+ ]
222
+ record_batch = Arrow::RecordBatch.new({
223
+ column: {
224
+ type: :timestamp,
225
+ unit: :milli,
226
+ }
227
+ },
228
+ records)
229
+ assert_equal(records, record_batch.raw_records)
230
+ end
231
+
232
+ test("micro") do
233
+ records = [
234
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
235
+ [nil],
236
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
237
+ ]
238
+ record_batch = Arrow::RecordBatch.new({
239
+ column: {
240
+ type: :timestamp,
241
+ unit: :micro,
242
+ }
243
+ },
244
+ records)
245
+ assert_equal(records, record_batch.raw_records)
246
+ end
247
+
248
+ test("nano") do
249
+ records = [
250
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
251
+ [nil],
252
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
253
+ ]
254
+ record_batch = Arrow::RecordBatch.new({
255
+ column: {
256
+ type: :timestamp,
257
+ unit: :nano,
258
+ }
259
+ },
260
+ records)
261
+ assert_equal(records, record_batch.raw_records)
262
+ end
263
+ end
264
+
265
+ sub_test_case("Time32Array") do
266
+ test("second") do
267
+ records = [
268
+ [60 * 10], # 00:10:00
269
+ [nil],
270
+ [60 * 60 * 2 + 9], # 02:00:09
271
+ ]
272
+ record_batch = Arrow::RecordBatch.new({
273
+ column: {
274
+ type: :time32,
275
+ unit: :second,
276
+ }
277
+ },
278
+ records)
279
+ assert_equal(records, record_batch.raw_records)
280
+ end
281
+
282
+ test("milli") do
283
+ records = [
284
+ [(60 * 10) * 1000 + 123], # 00:10:00.123
285
+ [nil],
286
+ [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
287
+ ]
288
+ record_batch = Arrow::RecordBatch.new({
289
+ column: {
290
+ type: :time32,
291
+ unit: :milli,
292
+ }
293
+ },
294
+ records)
295
+ assert_equal(records, record_batch.raw_records)
296
+ end
297
+ end
298
+
299
+ sub_test_case("Time64Array") do
300
+ test("micro") do
301
+ records = [
302
+ [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
303
+ [nil],
304
+ [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
305
+ ]
306
+ record_batch = Arrow::RecordBatch.new({
307
+ column: {
308
+ type: :time64,
309
+ unit: :micro,
310
+ }
311
+ },
312
+ records)
313
+ assert_equal(records, record_batch.raw_records)
314
+ end
315
+
316
+ test("nano") do
317
+ records = [
318
+ [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
319
+ [nil],
320
+ [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
321
+ ]
322
+ record_batch = Arrow::RecordBatch.new({
323
+ column: {
324
+ type: :time64,
325
+ unit: :nano,
326
+ }
327
+ },
328
+ records)
329
+ assert_equal(records, record_batch.raw_records)
330
+ end
331
+ end
332
+
333
+ test("Decimal128Array") do
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ record_batch = Arrow::RecordBatch.new({
340
+ column: {
341
+ type: :decimal128,
342
+ precision: 8,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, record_batch.raw_records)
348
+ end
349
+ end