red-arrow 0.16.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +60 -30
  4. data/ext/arrow/extconf.rb +14 -3
  5. data/ext/arrow/raw-records.cpp +2 -1
  6. data/ext/arrow/values.cpp +2 -1
  7. data/lib/arrow/buffer.rb +28 -0
  8. data/lib/arrow/dictionary-array.rb +24 -0
  9. data/lib/arrow/generic-filterable.rb +7 -7
  10. data/lib/arrow/generic-takeable.rb +2 -2
  11. data/lib/arrow/loader.rb +3 -0
  12. data/lib/arrow/null-array-builder.rb +1 -1
  13. data/lib/arrow/raw-table-converter.rb +47 -0
  14. data/lib/arrow/record-batch-iterator.rb +22 -0
  15. data/lib/arrow/record-batch.rb +9 -1
  16. data/lib/arrow/schema.rb +5 -0
  17. data/lib/arrow/struct-array-builder.rb +13 -7
  18. data/lib/arrow/table-saver.rb +8 -4
  19. data/lib/arrow/table.rb +8 -25
  20. data/lib/arrow/version.rb +1 -1
  21. data/test/helper.rb +1 -0
  22. data/test/helper/omittable.rb +36 -0
  23. data/test/raw-records/test-dense-union-array.rb +1 -34
  24. data/test/raw-records/test-sparse-union-array.rb +1 -33
  25. data/test/run-test.rb +14 -3
  26. data/test/test-array.rb +5 -3
  27. data/test/test-buffer.rb +11 -0
  28. data/test/test-chunked-array.rb +5 -3
  29. data/test/test-dense-union-data-type.rb +2 -2
  30. data/test/test-dictionary-array.rb +41 -0
  31. data/test/test-feather.rb +21 -6
  32. data/test/test-record-batch-iterator.rb +37 -0
  33. data/test/test-record-batch.rb +14 -0
  34. data/test/test-schema.rb +16 -0
  35. data/test/test-sparse-union-data-type.rb +2 -2
  36. data/test/test-struct-array-builder.rb +8 -4
  37. data/test/test-table.rb +9 -3
  38. data/test/values/test-dense-union-array.rb +1 -34
  39. data/test/values/test-sparse-union-array.rb +1 -33
  40. metadata +68 -59
@@ -19,7 +19,7 @@ module Arrow
19
19
  class NullArrayBuilder
20
20
  class << self
21
21
  def buildable?(args)
22
- super and args.collect(&:class) != [Integer]
22
+ super and not (args.size == 1 and args[0].is_a?(Integer))
23
23
  end
24
24
  end
25
25
  end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RawTableConverter
20
+ attr_reader :n_rows
21
+ attr_reader :schema
22
+ attr_reader :values
23
+ def initialize(raw_table)
24
+ @raw_table = raw_table
25
+ convert
26
+ end
27
+
28
+ private
29
+ def convert
30
+ if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column)
31
+ fields = @raw_table.collect(&:field)
32
+ @schema = Schema.new(fields)
33
+ @values = @raw_table.collect(&:data)
34
+ else
35
+ fields = []
36
+ @values = []
37
+ @raw_table.each do |name, array|
38
+ array = ArrayBuilder.build(array) if array.is_a?(::Array)
39
+ fields << Field.new(name.to_s, array.value_data_type)
40
+ @values << array
41
+ end
42
+ @schema = Schema.new(fields)
43
+ end
44
+ @n_rows = @values[0].length
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchIterator
20
+ alias_method :to_a, :to_list
21
+ end
22
+ end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class RecordBatch
20
22
  include ColumnContainable
@@ -25,13 +27,19 @@ module Arrow
25
27
  def new(*args)
26
28
  n_args = args.size
27
29
  case n_args
30
+ when 1
31
+ raw_table_converter = RawTableConverter.new(args[0])
32
+ n_rows = raw_table_converter.n_rows
33
+ schema = raw_table_converter.schema
34
+ values = raw_table_converter.values
35
+ super(schema, n_rows, values)
28
36
  when 2
29
37
  schema, data = args
30
38
  RecordBatchBuilder.build(schema, data)
31
39
  when 3
32
40
  super
33
41
  else
34
- message = "wrong number of arguments (given #{n_args}, expected 2..3)"
42
+ message = "wrong number of arguments (given #{n_args}, expected 1..3)"
35
43
  raise ArgumentError, message
36
44
  end
37
45
  end
@@ -91,5 +91,10 @@ module Arrow
91
91
  end
92
92
 
93
93
  alias_method :[], :find_field
94
+
95
+ alias_method :to_s_raw, :to_s
96
+ def to_s(show_metadata: false)
97
+ to_string_metadata(show_metadata)
98
+ end
94
99
  end
95
100
  end
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end
@@ -155,10 +155,14 @@ module Arrow
155
155
  end
156
156
 
157
157
  def save_as_feather
158
- open_output_stream do |output|
159
- FeatherFileWriter.open(output) do |writer|
160
- writer.write(@table)
161
- end
158
+ properties = FeatherWriteProperties.new
159
+ properties.class.properties.each do |name|
160
+ value = @options[name.to_sym]
161
+ next if value.nil?
162
+ properties.__send__("#{name}=", value)
163
+ end
164
+ open_raw_output_stream do |output|
165
+ @table.write_as_feather(output, properties)
162
166
  end
163
167
  end
164
168
  end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class Table
20
22
  include ColumnContainable
@@ -81,14 +83,6 @@ module Arrow
81
83
  # `Array`.
82
84
  #
83
85
  # @example Create a table from column name and values
84
- # count_chunks = [
85
- # Arrow::UInt32Array.new([0, 2]),
86
- # Arrow::UInt32Array.new([nil, 4]),
87
- # ]
88
- # visible_chunks = [
89
- # Arrow::BooleanArray.new([true]),
90
- # Arrow::BooleanArray.new([nil, nil, false]),
91
- # ]
92
86
  # Arrow::Table.new("count" => [0, 2, nil, 4],
93
87
  # "visible" => [true, nil, nil, false])
94
88
  #
@@ -169,22 +163,9 @@ module Arrow
169
163
  n_args = args.size
170
164
  case n_args
171
165
  when 1
172
- if args[0][0].is_a?(Column)
173
- columns = args[0]
174
- fields = columns.collect(&:field)
175
- values = columns.collect(&:data)
176
- schema = Schema.new(fields)
177
- else
178
- raw_table = args[0]
179
- fields = []
180
- values = []
181
- raw_table.each do |name, array|
182
- array = ArrayBuilder.build(array) if array.is_a?(::Array)
183
- fields << Field.new(name.to_s, array.value_data_type)
184
- values << array
185
- end
186
- schema = Schema.new(fields)
187
- end
166
+ raw_table_converter = RawTableConverter.new(args[0])
167
+ schema = raw_table_converter.schema
168
+ values = raw_table_converter.values
188
169
  when 2
189
170
  schema = args[0]
190
171
  schema = Schema.new(schema) unless schema.is_a?(Schema)
@@ -304,6 +285,8 @@ module Arrow
304
285
  end
305
286
  end
306
287
 
288
+ filter_options = Arrow::FilterOptions.new
289
+ filter_options.null_selection_behavior = :emit_null
307
290
  sliced_tables = []
308
291
  slicers.each do |slicer|
309
292
  slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
@@ -325,7 +308,7 @@ module Arrow
325
308
  to += n_rows if to < 0
326
309
  sliced_tables << slice_by_range(from, to)
327
310
  when ::Array, BooleanArray, ChunkedArray
328
- sliced_tables << filter(slicer)
311
+ sliced_tables << filter(slicer, filter_options)
329
312
  else
330
313
  message = "slicer must be Integer, Range, (from, to), " +
331
314
  "Arrow::ChunkedArray of Arrow::BooleanArray, " +
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.16.0"
19
+ VERSION = "2.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -24,3 +24,4 @@ require "zlib"
24
24
  require "test-unit"
25
25
 
26
26
  require_relative "helper/fixture"
27
+ require_relative "helper/omittable"
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Helper
19
+ module Omittable
20
+ def require_gi_bindings(major, minor, micro)
21
+ return if GLib.check_binding_version?(major, minor, micro)
22
+ message =
23
+ "Require gobject-introspection #{major}.#{minor}.#{micro} or later: " +
24
+ GLib::BINDING_VERSION.join(".")
25
+ omit(message)
26
+ end
27
+
28
+ def require_gi(major, minor, micro)
29
+ return if GObjectIntrospection::Version.or_later?(major, minor, micro)
30
+ message =
31
+ "Require GObject Introspection #{major}.#{minor}.#{micro} or later: " +
32
+ GObjectIntrospection::Version::STRING
33
+ omit(message)
34
+ end
35
+ end
36
+ end
@@ -56,10 +56,7 @@ module RawRecordsDenseUnionArrayTests
56
56
  end
57
57
  records.each do |record|
58
58
  column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
59
+ if column.key?("0")
63
60
  type_id = type_codes[0]
64
61
  type_ids << type_id
65
62
  offsets << (type_ids.count(type_id) - 1)
@@ -82,7 +79,6 @@ module RawRecordsDenseUnionArrayTests
82
79
  def test_null
83
80
  records = [
84
81
  [{"0" => nil}],
85
- [nil],
86
82
  ]
87
83
  target = build(:null, records)
88
84
  assert_equal(records, target.raw_records)
@@ -91,7 +87,6 @@ module RawRecordsDenseUnionArrayTests
91
87
  def test_boolean
92
88
  records = [
93
89
  [{"0" => true}],
94
- [nil],
95
90
  [{"1" => nil}],
96
91
  ]
97
92
  target = build(:boolean, records)
@@ -101,7 +96,6 @@ module RawRecordsDenseUnionArrayTests
101
96
  def test_int8
102
97
  records = [
103
98
  [{"0" => -(2 ** 7)}],
104
- [nil],
105
99
  [{"1" => nil}],
106
100
  ]
107
101
  target = build(:int8, records)
@@ -111,7 +105,6 @@ module RawRecordsDenseUnionArrayTests
111
105
  def test_uint8
112
106
  records = [
113
107
  [{"0" => (2 ** 8) - 1}],
114
- [nil],
115
108
  [{"1" => nil}],
116
109
  ]
117
110
  target = build(:uint8, records)
@@ -121,7 +114,6 @@ module RawRecordsDenseUnionArrayTests
121
114
  def test_int16
122
115
  records = [
123
116
  [{"0" => -(2 ** 15)}],
124
- [nil],
125
117
  [{"1" => nil}],
126
118
  ]
127
119
  target = build(:int16, records)
@@ -131,7 +123,6 @@ module RawRecordsDenseUnionArrayTests
131
123
  def test_uint16
132
124
  records = [
133
125
  [{"0" => (2 ** 16) - 1}],
134
- [nil],
135
126
  [{"1" => nil}],
136
127
  ]
137
128
  target = build(:uint16, records)
@@ -141,7 +132,6 @@ module RawRecordsDenseUnionArrayTests
141
132
  def test_int32
142
133
  records = [
143
134
  [{"0" => -(2 ** 31)}],
144
- [nil],
145
135
  [{"1" => nil}],
146
136
  ]
147
137
  target = build(:int32, records)
@@ -151,7 +141,6 @@ module RawRecordsDenseUnionArrayTests
151
141
  def test_uint32
152
142
  records = [
153
143
  [{"0" => (2 ** 32) - 1}],
154
- [nil],
155
144
  [{"1" => nil}],
156
145
  ]
157
146
  target = build(:uint32, records)
@@ -161,7 +150,6 @@ module RawRecordsDenseUnionArrayTests
161
150
  def test_int64
162
151
  records = [
163
152
  [{"0" => -(2 ** 63)}],
164
- [nil],
165
153
  [{"1" => nil}],
166
154
  ]
167
155
  target = build(:int64, records)
@@ -171,7 +159,6 @@ module RawRecordsDenseUnionArrayTests
171
159
  def test_uint64
172
160
  records = [
173
161
  [{"0" => (2 ** 64) - 1}],
174
- [nil],
175
162
  [{"1" => nil}],
176
163
  ]
177
164
  target = build(:uint64, records)
@@ -181,7 +168,6 @@ module RawRecordsDenseUnionArrayTests
181
168
  def test_float
182
169
  records = [
183
170
  [{"0" => -1.0}],
184
- [nil],
185
171
  [{"1" => nil}],
186
172
  ]
187
173
  target = build(:float, records)
@@ -191,7 +177,6 @@ module RawRecordsDenseUnionArrayTests
191
177
  def test_double
192
178
  records = [
193
179
  [{"0" => -1.0}],
194
- [nil],
195
180
  [{"1" => nil}],
196
181
  ]
197
182
  target = build(:double, records)
@@ -201,7 +186,6 @@ module RawRecordsDenseUnionArrayTests
201
186
  def test_binary
202
187
  records = [
203
188
  [{"0" => "\xff".b}],
204
- [nil],
205
189
  [{"1" => nil}],
206
190
  ]
207
191
  target = build(:binary, records)
@@ -211,7 +195,6 @@ module RawRecordsDenseUnionArrayTests
211
195
  def test_string
212
196
  records = [
213
197
  [{"0" => "Ruby"}],
214
- [nil],
215
198
  [{"1" => nil}],
216
199
  ]
217
200
  target = build(:string, records)
@@ -221,7 +204,6 @@ module RawRecordsDenseUnionArrayTests
221
204
  def test_date32
222
205
  records = [
223
206
  [{"0" => Date.new(1960, 1, 1)}],
224
- [nil],
225
207
  [{"1" => nil}],
226
208
  ]
227
209
  target = build(:date32, records)
@@ -231,7 +213,6 @@ module RawRecordsDenseUnionArrayTests
231
213
  def test_date64
232
214
  records = [
233
215
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
234
- [nil],
235
216
  [{"1" => nil}],
236
217
  ]
237
218
  target = build(:date64, records)
@@ -241,7 +222,6 @@ module RawRecordsDenseUnionArrayTests
241
222
  def test_timestamp_second
242
223
  records = [
243
224
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
244
- [nil],
245
225
  [{"1" => nil}],
246
226
  ]
247
227
  target = build({
@@ -255,7 +235,6 @@ module RawRecordsDenseUnionArrayTests
255
235
  def test_timestamp_milli
256
236
  records = [
257
237
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
258
- [nil],
259
238
  [{"1" => nil}],
260
239
  ]
261
240
  target = build({
@@ -269,7 +248,6 @@ module RawRecordsDenseUnionArrayTests
269
248
  def test_timestamp_micro
270
249
  records = [
271
250
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
272
- [nil],
273
251
  [{"1" => nil}],
274
252
  ]
275
253
  target = build({
@@ -283,7 +261,6 @@ module RawRecordsDenseUnionArrayTests
283
261
  def test_timestamp_nano
284
262
  records = [
285
263
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
286
- [nil],
287
264
  [{"1" => nil}],
288
265
  ]
289
266
  target = build({
@@ -299,7 +276,6 @@ module RawRecordsDenseUnionArrayTests
299
276
  records = [
300
277
  # 00:10:00
301
278
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
302
- [nil],
303
279
  [{"1" => nil}],
304
280
  ]
305
281
  target = build({
@@ -315,7 +291,6 @@ module RawRecordsDenseUnionArrayTests
315
291
  records = [
316
292
  # 00:10:00.123
317
293
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
318
- [nil],
319
294
  [{"1" => nil}],
320
295
  ]
321
296
  target = build({
@@ -331,7 +306,6 @@ module RawRecordsDenseUnionArrayTests
331
306
  records = [
332
307
  # 00:10:00.123456
333
308
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
334
- [nil],
335
309
  [{"1" => nil}],
336
310
  ]
337
311
  target = build({
@@ -347,7 +321,6 @@ module RawRecordsDenseUnionArrayTests
347
321
  records = [
348
322
  # 00:10:00.123456789
349
323
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
350
- [nil],
351
324
  [{"1" => nil}],
352
325
  ]
353
326
  target = build({
@@ -361,7 +334,6 @@ module RawRecordsDenseUnionArrayTests
361
334
  def test_decimal128
362
335
  records = [
363
336
  [{"0" => BigDecimal("92.92")}],
364
- [nil],
365
337
  [{"1" => nil}],
366
338
  ]
367
339
  target = build({
@@ -376,7 +348,6 @@ module RawRecordsDenseUnionArrayTests
376
348
  def test_list
377
349
  records = [
378
350
  [{"0" => [true, nil, false]}],
379
- [nil],
380
351
  [{"1" => nil}],
381
352
  ]
382
353
  target = build({
@@ -393,7 +364,6 @@ module RawRecordsDenseUnionArrayTests
393
364
  def test_struct
394
365
  records = [
395
366
  [{"0" => {"sub_field" => true}}],
396
- [nil],
397
367
  [{"1" => nil}],
398
368
  [{"0" => {"sub_field" => nil}}],
399
369
  ]
@@ -414,7 +384,6 @@ module RawRecordsDenseUnionArrayTests
414
384
  omit("Need to add support for SparseUnionArrayBuilder")
415
385
  records = [
416
386
  [{"0" => {"field1" => true}}],
417
- [nil],
418
387
  [{"1" => nil}],
419
388
  [{"0" => {"field2" => nil}}],
420
389
  ]
@@ -440,7 +409,6 @@ module RawRecordsDenseUnionArrayTests
440
409
  omit("Need to add support for DenseUnionArrayBuilder")
441
410
  records = [
442
411
  [{"0" => {"field1" => true}}],
443
- [nil],
444
412
  [{"1" => nil}],
445
413
  [{"0" => {"field2" => nil}}],
446
414
  ]
@@ -466,7 +434,6 @@ module RawRecordsDenseUnionArrayTests
466
434
  omit("Need to add support for DictionaryArrayBuilder")
467
435
  records = [
468
436
  [{"0" => "Ruby"}],
469
- [nil],
470
437
  [{"1" => nil}],
471
438
  [{"0" => "GLib"}],
472
439
  ]