red-arrow 0.15.1 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +6 -6
- data/lib/arrow/array-builder.rb +101 -52
- data/lib/arrow/array.rb +28 -10
- data/lib/arrow/chunked-array.rb +2 -0
- data/lib/arrow/csv-loader.rb +5 -0
- data/lib/arrow/csv-read-options.rb +18 -0
- data/lib/arrow/data-type.rb +35 -2
- data/lib/arrow/decimal128-array-builder.rb +0 -2
- data/lib/arrow/field.rb +1 -1
- data/lib/arrow/generic-filterable.rb +43 -0
- data/lib/arrow/generic-takeable.rb +38 -0
- data/lib/arrow/list-data-type.rb +58 -8
- data/lib/arrow/loader.rb +9 -1
- data/lib/arrow/{binary-array-builder.rb → null-array.rb} +3 -15
- data/lib/arrow/record-batch.rb +0 -3
- data/lib/arrow/schema.rb +0 -2
- data/lib/arrow/struct-data-type.rb +0 -2
- data/lib/arrow/table-loader.rb +29 -6
- data/lib/arrow/table-saver.rb +29 -9
- data/lib/arrow/table.rb +14 -50
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +3 -1
- data/test/test-array-builder.rb +17 -0
- data/test/test-array.rb +102 -0
- data/test/test-chunked-array.rb +94 -0
- data/test/test-csv-loader.rb +2 -2
- data/test/test-data-type.rb +11 -0
- data/test/test-list-data-type.rb +27 -1
- data/test/test-null-array.rb +23 -0
- data/test/test-slicer.rb +74 -30
- data/test/test-table.rb +147 -14
- data/test/test-timestamp-array.rb +19 -0
- metadata +60 -55
data/lib/arrow/list-data-type.rb
CHANGED
@@ -53,16 +53,66 @@ module Arrow
|
|
53
53
|
#
|
54
54
|
# @example Create a list data type with field description
|
55
55
|
# Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
56
|
+
#
|
57
|
+
# @overload initialize(data_type)
|
58
|
+
#
|
59
|
+
# @param data_type [Arrow::DataType, String, Symbol,
|
60
|
+
# ::Array<String>, ::Array<Symbol>, Hash] The element data
|
61
|
+
# type of the list data type. A field is created with the
|
62
|
+
# default name `"item"` from the data type automatically.
|
63
|
+
#
|
64
|
+
# See {Arrow::DataType.resolve} how to specify data type.
|
65
|
+
#
|
66
|
+
# @example Create a list data type with {Arrow::DataType}
|
67
|
+
# Arrow::ListDataType.new(Arrow::BooleanDataType.new)
|
68
|
+
#
|
69
|
+
# @example Create a list data type with data type name as String
|
70
|
+
# Arrow::ListDataType.new("boolean")
|
71
|
+
#
|
72
|
+
# @example Create a list data type with data type name as Symbol
|
73
|
+
# Arrow::ListDataType.new(:boolean)
|
74
|
+
#
|
75
|
+
# @example Create a list data type with data type as Array
|
76
|
+
# Arrow::ListDataType.new([:time32, :milli])
|
77
|
+
def initialize(arg)
|
78
|
+
data_type = resolve_data_type(arg)
|
79
|
+
if data_type
|
80
|
+
field = Field.new(default_field_name, data_type)
|
81
|
+
else
|
82
|
+
field = resolve_field(arg)
|
64
83
|
end
|
65
84
|
initialize_raw(field)
|
66
85
|
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def resolve_data_type(arg)
|
89
|
+
case arg
|
90
|
+
when DataType, String, Symbol, ::Array
|
91
|
+
DataType.resolve(arg)
|
92
|
+
when Hash
|
93
|
+
return nil if arg[:name]
|
94
|
+
return nil unless arg[:type]
|
95
|
+
DataType.resolve(arg)
|
96
|
+
else
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def default_field_name
|
102
|
+
"item"
|
103
|
+
end
|
104
|
+
|
105
|
+
def resolve_field(arg)
|
106
|
+
if arg.is_a?(Hash) and arg.key?(:field)
|
107
|
+
description = arg
|
108
|
+
arg = description[:field]
|
109
|
+
end
|
110
|
+
if arg.is_a?(Hash)
|
111
|
+
field_description = arg
|
112
|
+
Field.new(field_description)
|
113
|
+
else
|
114
|
+
arg
|
115
|
+
end
|
116
|
+
end
|
67
117
|
end
|
68
118
|
end
|
data/lib/arrow/loader.rb
CHANGED
@@ -32,9 +32,15 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def require_libraries
|
35
|
+
require "arrow/column-containable"
|
36
|
+
require "arrow/field-containable"
|
37
|
+
require "arrow/generic-filterable"
|
38
|
+
require "arrow/generic-takeable"
|
39
|
+
require "arrow/record-containable"
|
40
|
+
|
35
41
|
require "arrow/array"
|
36
42
|
require "arrow/array-builder"
|
37
|
-
require "arrow/
|
43
|
+
require "arrow/bigdecimal-extension"
|
38
44
|
require "arrow/chunked-array"
|
39
45
|
require "arrow/column"
|
40
46
|
require "arrow/compression-type"
|
@@ -53,8 +59,10 @@ module Arrow
|
|
53
59
|
require "arrow/dictionary-data-type"
|
54
60
|
require "arrow/field"
|
55
61
|
require "arrow/file-output-stream"
|
62
|
+
require "arrow/group"
|
56
63
|
require "arrow/list-array-builder"
|
57
64
|
require "arrow/list-data-type"
|
65
|
+
require "arrow/null-array"
|
58
66
|
require "arrow/null-array-builder"
|
59
67
|
require "arrow/path-extension"
|
60
68
|
require "arrow/record"
|
@@ -16,21 +16,9 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
module Arrow
|
19
|
-
class
|
20
|
-
def
|
21
|
-
|
22
|
-
is_valids.each_with_index do |is_valid, i|
|
23
|
-
if is_valid
|
24
|
-
append_value(values[i])
|
25
|
-
else
|
26
|
-
append_null
|
27
|
-
end
|
28
|
-
end
|
29
|
-
else
|
30
|
-
values.each do |value|
|
31
|
-
append_value(value)
|
32
|
-
end
|
33
|
-
end
|
19
|
+
class NullArray
|
20
|
+
def get_value(i)
|
21
|
+
nil
|
34
22
|
end
|
35
23
|
end
|
36
24
|
end
|
data/lib/arrow/record-batch.rb
CHANGED
data/lib/arrow/schema.rb
CHANGED
data/lib/arrow/table-loader.rb
CHANGED
@@ -41,6 +41,8 @@ module Arrow
|
|
41
41
|
available_formats << match_data.post_match
|
42
42
|
end
|
43
43
|
end
|
44
|
+
deprecated_formats = ["batch", "stream"]
|
45
|
+
available_formats -= deprecated_formats
|
44
46
|
message = "Arrow::Table load format must be one of ["
|
45
47
|
message << available_formats.join(", ")
|
46
48
|
message << "]: #{format.inspect}"
|
@@ -119,18 +121,30 @@ module Arrow
|
|
119
121
|
load_raw(input, reader)
|
120
122
|
end
|
121
123
|
|
122
|
-
|
124
|
+
# @since 1.0.0
|
125
|
+
def load_as_arrow_file
|
123
126
|
input = open_input_stream
|
124
127
|
reader = RecordBatchFileReader.new(input)
|
125
128
|
load_raw(input, reader)
|
126
129
|
end
|
127
130
|
|
128
|
-
|
131
|
+
# @deprecated Use `format: :arrow_file` instead.
|
132
|
+
def load_as_batch
|
133
|
+
load_as_arrow_file
|
134
|
+
end
|
135
|
+
|
136
|
+
# @since 1.0.0
|
137
|
+
def load_as_arrow_streaming
|
129
138
|
input = open_input_stream
|
130
139
|
reader = RecordBatchStreamReader.new(input)
|
131
140
|
load_raw(input, reader)
|
132
141
|
end
|
133
142
|
|
143
|
+
# @deprecated Use `format: :arrow_streaming` instead.
|
144
|
+
def load_as_stream
|
145
|
+
load_as_arrow_streaming
|
146
|
+
end
|
147
|
+
|
134
148
|
if Arrow.const_defined?(:ORCFileReader)
|
135
149
|
def load_as_orc
|
136
150
|
input = open_input_stream
|
@@ -143,16 +157,25 @@ module Arrow
|
|
143
157
|
end
|
144
158
|
end
|
145
159
|
|
146
|
-
def
|
147
|
-
options = @options.dup
|
160
|
+
def csv_load(options)
|
148
161
|
options.delete(:format)
|
149
162
|
if @input.is_a?(Buffer)
|
150
|
-
CSVLoader.load(@input.data.to_s, options)
|
163
|
+
CSVLoader.load(@input.data.to_s, **options)
|
151
164
|
else
|
152
|
-
CSVLoader.load(Pathname.new(@input), options)
|
165
|
+
CSVLoader.load(Pathname.new(@input), **options)
|
153
166
|
end
|
154
167
|
end
|
155
168
|
|
169
|
+
def load_as_csv
|
170
|
+
csv_load(@options.dup)
|
171
|
+
end
|
172
|
+
|
173
|
+
def load_as_tsv
|
174
|
+
options = @options.dup
|
175
|
+
options[:delimiter] = "\t"
|
176
|
+
csv_load(options.dup)
|
177
|
+
end
|
178
|
+
|
156
179
|
def load_as_feather
|
157
180
|
input = open_input_stream
|
158
181
|
reader = FeatherFileReader.new(input)
|
data/lib/arrow/table-saver.rb
CHANGED
@@ -42,6 +42,8 @@ module Arrow
|
|
42
42
|
available_formats << match_data.post_match
|
43
43
|
end
|
44
44
|
end
|
45
|
+
deprecated_formats = ["batch", "stream"]
|
46
|
+
available_formats -= deprecated_formats
|
45
47
|
message = "Arrow::Table save format must be one of ["
|
46
48
|
message << available_formats.join(", ")
|
47
49
|
message << "]: #{format.inspect}"
|
@@ -110,30 +112,48 @@ module Arrow
|
|
110
112
|
end
|
111
113
|
|
112
114
|
def save_as_arrow
|
113
|
-
|
115
|
+
save_as_arrow_file
|
114
116
|
end
|
115
117
|
|
116
|
-
|
118
|
+
# @since 1.0.0
|
119
|
+
def save_as_arrow_file
|
117
120
|
save_raw(RecordBatchFileWriter)
|
118
121
|
end
|
119
122
|
|
120
|
-
|
123
|
+
# @deprecated Use `format: :arrow_batch` instead.
|
124
|
+
def save_as_batch
|
125
|
+
save_as_arrow_file
|
126
|
+
end
|
127
|
+
|
128
|
+
# @since 1.0.0
|
129
|
+
def save_as_arrow_streaming
|
121
130
|
save_raw(RecordBatchStreamWriter)
|
122
131
|
end
|
123
132
|
|
124
|
-
|
133
|
+
# @deprecated Use `format: :arrow_streaming` instead.
|
134
|
+
def save_as_stream
|
135
|
+
save_as_arrow_streaming
|
136
|
+
end
|
137
|
+
|
138
|
+
def csv_save(**options)
|
125
139
|
open_output_stream do |output|
|
126
|
-
csv = CSV.new(output)
|
140
|
+
csv = CSV.new(output, **options)
|
127
141
|
names = @table.schema.fields.collect(&:name)
|
128
142
|
csv << names
|
129
|
-
@table.
|
130
|
-
csv <<
|
131
|
-
record[name]
|
132
|
-
end
|
143
|
+
@table.raw_records.each do |record|
|
144
|
+
csv << record
|
133
145
|
end
|
134
146
|
end
|
135
147
|
end
|
136
148
|
|
149
|
+
def save_as_csv
|
150
|
+
csv_save
|
151
|
+
end
|
152
|
+
|
153
|
+
def save_as_tsv
|
154
|
+
csv_save(col_sep: "\t")
|
155
|
+
end
|
156
|
+
|
137
157
|
def save_as_feather
|
138
158
|
open_output_stream do |output|
|
139
159
|
FeatherFileWriter.open(output) do |writer|
|
data/lib/arrow/table.rb
CHANGED
@@ -15,13 +15,11 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
require "arrow/column-containable"
|
19
|
-
require "arrow/group"
|
20
|
-
require "arrow/record-containable"
|
21
|
-
|
22
18
|
module Arrow
|
23
19
|
class Table
|
24
20
|
include ColumnContainable
|
21
|
+
include GenericFilterable
|
22
|
+
include GenericTakeable
|
25
23
|
include RecordContainable
|
26
24
|
|
27
25
|
class << self
|
@@ -306,13 +304,13 @@ module Arrow
|
|
306
304
|
end
|
307
305
|
end
|
308
306
|
|
309
|
-
|
307
|
+
sliced_tables = []
|
310
308
|
slicers.each do |slicer|
|
311
309
|
slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
|
312
310
|
case slicer
|
313
311
|
when Integer
|
314
312
|
slicer += n_rows if slicer < 0
|
315
|
-
|
313
|
+
sliced_tables << slice_by_range(slicer, n_rows - 1)
|
316
314
|
when Range
|
317
315
|
original_from = from = slicer.first
|
318
316
|
to = slicer.last
|
@@ -325,17 +323,9 @@ module Arrow
|
|
325
323
|
raise ArgumentError, message
|
326
324
|
end
|
327
325
|
to += n_rows if to < 0
|
328
|
-
|
329
|
-
when ::Array
|
330
|
-
|
331
|
-
when ChunkedArray
|
332
|
-
offset = 0
|
333
|
-
slicer.each_chunk do |array|
|
334
|
-
boolean_array_to_slice_ranges(array, offset, ranges)
|
335
|
-
offset += array.length
|
336
|
-
end
|
337
|
-
when BooleanArray
|
338
|
-
boolean_array_to_slice_ranges(slicer, 0, ranges)
|
326
|
+
sliced_tables << slice_by_range(from, to)
|
327
|
+
when ::Array, BooleanArray, ChunkedArray
|
328
|
+
sliced_tables << filter(slicer)
|
339
329
|
else
|
340
330
|
message = "slicer must be Integer, Range, (from, to), " +
|
341
331
|
"Arrow::ChunkedArray of Arrow::BooleanArray, " +
|
@@ -343,7 +333,11 @@ module Arrow
|
|
343
333
|
raise ArgumentError, message
|
344
334
|
end
|
345
335
|
end
|
346
|
-
|
336
|
+
if sliced_tables.size > 1
|
337
|
+
sliced_tables[0].concatenate(sliced_tables[1..-1])
|
338
|
+
else
|
339
|
+
sliced_tables[0]
|
340
|
+
end
|
347
341
|
end
|
348
342
|
|
349
343
|
# TODO
|
@@ -514,38 +508,8 @@ module Arrow
|
|
514
508
|
end
|
515
509
|
|
516
510
|
private
|
517
|
-
def
|
518
|
-
|
519
|
-
target_start = nil
|
520
|
-
array.each_with_index do |is_target, i|
|
521
|
-
if is_target
|
522
|
-
unless in_target
|
523
|
-
target_start = offset + i
|
524
|
-
in_target = true
|
525
|
-
end
|
526
|
-
else
|
527
|
-
if in_target
|
528
|
-
ranges << [target_start, offset + i - 1]
|
529
|
-
target_start = nil
|
530
|
-
in_target = false
|
531
|
-
end
|
532
|
-
end
|
533
|
-
end
|
534
|
-
if in_target
|
535
|
-
ranges << [target_start, offset + array.length - 1]
|
536
|
-
end
|
537
|
-
end
|
538
|
-
|
539
|
-
def slice_by_ranges(ranges)
|
540
|
-
sliced_table = []
|
541
|
-
ranges.each do |from, to|
|
542
|
-
sliced_table << slice_raw(from, to - from + 1)
|
543
|
-
end
|
544
|
-
if sliced_table.size > 1
|
545
|
-
sliced_table[0].concatenate(sliced_table[1..-1])
|
546
|
-
else
|
547
|
-
sliced_table[0]
|
548
|
-
end
|
511
|
+
def slice_by_range(from, to)
|
512
|
+
slice_raw(from, to - from + 1)
|
549
513
|
end
|
550
514
|
|
551
515
|
def ensure_raw_column(name, data)
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -59,5 +59,7 @@ Gem::Specification.new do |spec|
|
|
59
59
|
spec.add_development_dependency("test-unit")
|
60
60
|
spec.add_development_dependency("yard")
|
61
61
|
|
62
|
-
|
62
|
+
required_msys2_package_version = version_components[0, 3].join(".")
|
63
|
+
spec.metadata["msys2_mingw_dependencies"] =
|
64
|
+
"arrow>=#{required_msys2_package_version}"
|
63
65
|
end
|
data/test/test-array-builder.rb
CHANGED
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
|
|
60
60
|
DateTime.new(2018, 1, 5, 0, 23, 21),
|
61
61
|
])
|
62
62
|
end
|
63
|
+
|
64
|
+
test("list<boolean>s") do
|
65
|
+
assert_build(Arrow::ArrayBuilder,
|
66
|
+
[
|
67
|
+
[nil, true, false],
|
68
|
+
nil,
|
69
|
+
[false],
|
70
|
+
])
|
71
|
+
end
|
72
|
+
|
73
|
+
test("list<string>s") do
|
74
|
+
assert_build(Arrow::ArrayBuilder,
|
75
|
+
[
|
76
|
+
["Hello", "World"],
|
77
|
+
["Apache Arrow"],
|
78
|
+
])
|
79
|
+
end
|
63
80
|
end
|
64
81
|
|
65
82
|
sub_test_case("specific builder") do
|