red-arrow 6.0.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/ext/arrow/arrow.cpp +12 -0
- data/ext/arrow/converters.hpp +46 -10
- data/ext/arrow/extconf.rb +1 -1
- data/ext/arrow/raw-records.cpp +3 -2
- data/ext/arrow/red-arrow.hpp +7 -0
- data/ext/arrow/values.cpp +3 -2
- data/lib/arrow/datum.rb +2 -0
- data/lib/arrow/day-time-interval-array-builder.rb +29 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/loader.rb +16 -0
- data/lib/arrow/month-day-nano-interval-array-builder.rb +29 -0
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/table-loader.rb +99 -62
- data/lib/arrow/table-saver.rb +7 -2
- data/lib/arrow/table.rb +78 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -10
- data/test/helper.rb +2 -0
- data/test/raw-records/test-basic-arrays.rb +30 -0
- data/test/raw-records/test-dense-union-array.rb +27 -0
- data/test/raw-records/test-list-array.rb +39 -0
- data/test/raw-records/test-map-array.rb +37 -0
- data/test/raw-records/test-sparse-union-array.rb +27 -0
- data/test/raw-records/test-struct-array.rb +30 -0
- data/test/test-function.rb +48 -14
- data/test/test-table.rb +204 -6
- data/test/values/test-basic-arrays.rb +30 -0
- data/test/values/test-dense-union-array.rb +27 -0
- data/test/values/test-dictionary-array.rb +295 -0
- data/test/values/test-list-array.rb +39 -0
- data/test/values/test-map-array.rb +33 -0
- data/test/values/test-sparse-union-array.rb +27 -0
- data/test/values/test-struct-array.rb +30 -0
- metadata +88 -194
data/test/test-function.rb
CHANGED
@@ -53,6 +53,14 @@ class FunctionTest < Test::Unit::TestCase
|
|
53
53
|
or_function.execute(args).value.to_a)
|
54
54
|
end
|
55
55
|
|
56
|
+
test("Arrow::Column") do
|
57
|
+
or_function = Arrow::Function.find("or")
|
58
|
+
table = Arrow::Table.new(a: [true, false, false],
|
59
|
+
b: [true, false, true])
|
60
|
+
assert_equal([true, false, true],
|
61
|
+
or_function.execute([table.a, table.b]).value.to_a)
|
62
|
+
end
|
63
|
+
|
56
64
|
test("Arrow::Scalar") do
|
57
65
|
add_function = Arrow::Function.find("add")
|
58
66
|
args = [
|
@@ -116,12 +124,13 @@ class FunctionTest < Test::Unit::TestCase
|
|
116
124
|
cast_function = Arrow::Function.find("cast")
|
117
125
|
date = Date.new(2021, 6, 12)
|
118
126
|
args = [date]
|
119
|
-
options =
|
120
|
-
|
127
|
+
options = {
|
128
|
+
to_data_type: Arrow::TimestampDataType.new(:second),
|
129
|
+
}
|
121
130
|
time = Time.utc(date.year,
|
122
131
|
date.month,
|
123
132
|
date.day)
|
124
|
-
assert_equal(Arrow::TimestampScalar.new(options
|
133
|
+
assert_equal(Arrow::TimestampScalar.new(options[:to_data_type],
|
125
134
|
time.to_i),
|
126
135
|
cast_function.execute(args, options).value)
|
127
136
|
end
|
@@ -132,9 +141,10 @@ class FunctionTest < Test::Unit::TestCase
|
|
132
141
|
# 00:10:00
|
133
142
|
60 * 10)
|
134
143
|
args = [arrow_time]
|
135
|
-
options =
|
136
|
-
|
137
|
-
|
144
|
+
options = {
|
145
|
+
to_data_type: Arrow::Time64DataType.new(:micro),
|
146
|
+
}
|
147
|
+
assert_equal(Arrow::Time64Scalar.new(options[:to_data_type],
|
138
148
|
# 00:10:00.000000
|
139
149
|
60 * 10 * 1000 * 1000),
|
140
150
|
cast_function.execute(args, options).value)
|
@@ -146,10 +156,11 @@ class FunctionTest < Test::Unit::TestCase
|
|
146
156
|
# 00:10:00.000000
|
147
157
|
60 * 10 * 1000 * 1000)
|
148
158
|
args = [arrow_time]
|
149
|
-
options =
|
150
|
-
|
151
|
-
|
152
|
-
|
159
|
+
options = {
|
160
|
+
to_data_type: Arrow::Time32DataType.new(:second),
|
161
|
+
allow_time_truncate: true,
|
162
|
+
}
|
163
|
+
assert_equal(Arrow::Time32Scalar.new(options[:to_data_type],
|
153
164
|
# 00:10:00
|
154
165
|
60 * 10),
|
155
166
|
cast_function.execute(args, options).value)
|
@@ -159,18 +170,41 @@ class FunctionTest < Test::Unit::TestCase
|
|
159
170
|
cast_function = Arrow::Function.find("cast")
|
160
171
|
time = Time.utc(2021, 6, 12, 1, 2, 3, 1)
|
161
172
|
args = [time]
|
162
|
-
options =
|
163
|
-
|
164
|
-
|
173
|
+
options = {
|
174
|
+
to_data_type: Arrow::TimestampDataType.new(:second),
|
175
|
+
allow_time_truncate: true,
|
176
|
+
}
|
165
177
|
time = Time.utc(time.year,
|
166
178
|
time.month,
|
167
179
|
time.day,
|
168
180
|
time.hour,
|
169
181
|
time.min,
|
170
182
|
time.sec)
|
171
|
-
assert_equal(Arrow::TimestampScalar.new(options
|
183
|
+
assert_equal(Arrow::TimestampScalar.new(options[:to_data_type],
|
172
184
|
time.to_i),
|
173
185
|
cast_function.execute(args, options).value)
|
174
186
|
end
|
187
|
+
|
188
|
+
test("SetLookupOptions") do
|
189
|
+
is_in_function = Arrow::Function.find("is_in")
|
190
|
+
args = [
|
191
|
+
Arrow::Int16Array.new([1, 0, 1, 2]),
|
192
|
+
]
|
193
|
+
options = {
|
194
|
+
value_set: Arrow::Int16Array.new([2, 0]),
|
195
|
+
}
|
196
|
+
assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
|
197
|
+
is_in_function.execute(args, options).value)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_call
|
202
|
+
or_function = Arrow::Function.find("or")
|
203
|
+
args = [
|
204
|
+
Arrow::BooleanArray.new([true, false, false]),
|
205
|
+
Arrow::BooleanArray.new([true, false, true]),
|
206
|
+
]
|
207
|
+
assert_equal([true, false, true],
|
208
|
+
or_function.call(args).value.to_a)
|
175
209
|
end
|
176
210
|
end
|
data/test/test-table.rb
CHANGED
@@ -186,7 +186,12 @@ class TableTest < Test::Unit::TestCase
|
|
186
186
|
end
|
187
187
|
|
188
188
|
test("{key: Range}: beginless include end") do
|
189
|
-
|
189
|
+
begin
|
190
|
+
range = eval("..8")
|
191
|
+
rescue SyntaxError
|
192
|
+
omit("beginless range isn't supported")
|
193
|
+
end
|
194
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
190
195
|
count visible
|
191
196
|
0 1 true
|
192
197
|
1 2 false
|
@@ -196,7 +201,12 @@ class TableTest < Test::Unit::TestCase
|
|
196
201
|
end
|
197
202
|
|
198
203
|
test("{key: Range}: beginless exclude end") do
|
199
|
-
|
204
|
+
begin
|
205
|
+
range = eval("...8")
|
206
|
+
rescue SyntaxError
|
207
|
+
omit("beginless range isn't supported")
|
208
|
+
end
|
209
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
200
210
|
count visible
|
201
211
|
0 1 true
|
202
212
|
1 2 false
|
@@ -205,7 +215,12 @@ class TableTest < Test::Unit::TestCase
|
|
205
215
|
end
|
206
216
|
|
207
217
|
test("{key: Range}: endless") do
|
208
|
-
|
218
|
+
begin
|
219
|
+
range = eval("16..")
|
220
|
+
rescue SyntaxError
|
221
|
+
omit("endless range isn't supported")
|
222
|
+
end
|
223
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
209
224
|
count visible
|
210
225
|
0 16 true
|
211
226
|
1 32 false
|
@@ -573,14 +588,20 @@ class TableTest < Test::Unit::TestCase
|
|
573
588
|
assert_equal(@table, Arrow::Table.load(output, format: :batch))
|
574
589
|
end
|
575
590
|
|
591
|
+
def test_arrows
|
592
|
+
output = create_output(".arrows")
|
593
|
+
@table.save(output, format: :arrows)
|
594
|
+
assert_equal(@table, Arrow::Table.load(output, format: :arrows))
|
595
|
+
end
|
596
|
+
|
576
597
|
def test_arrow_streaming
|
577
|
-
output = create_output(".
|
598
|
+
output = create_output(".arrows")
|
578
599
|
@table.save(output, format: :arrow_streaming)
|
579
600
|
assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
|
580
601
|
end
|
581
602
|
|
582
603
|
def test_stream
|
583
|
-
output = create_output(".
|
604
|
+
output = create_output(".arrows")
|
584
605
|
@table.save(output, format: :stream)
|
585
606
|
assert_equal(@table, Arrow::Table.load(output, format: :stream))
|
586
607
|
end
|
@@ -626,6 +647,24 @@ class TableTest < Test::Unit::TestCase
|
|
626
647
|
end
|
627
648
|
|
628
649
|
sub_test_case("save: auto detect") do
|
650
|
+
test("arrow") do
|
651
|
+
output = create_output(".arrow")
|
652
|
+
@table.save(output)
|
653
|
+
assert_equal(@table,
|
654
|
+
Arrow::Table.load(output,
|
655
|
+
format: :arrow,
|
656
|
+
schema: @table.schema))
|
657
|
+
end
|
658
|
+
|
659
|
+
test("arrows") do
|
660
|
+
output = create_output(".arrows")
|
661
|
+
@table.save(output)
|
662
|
+
assert_equal(@table,
|
663
|
+
Arrow::Table.load(output,
|
664
|
+
format: :arrows,
|
665
|
+
schema: @table.schema))
|
666
|
+
end
|
667
|
+
|
629
668
|
test("csv") do
|
630
669
|
output = create_output(".csv")
|
631
670
|
@table.save(output)
|
@@ -664,7 +703,13 @@ class TableTest < Test::Unit::TestCase
|
|
664
703
|
|
665
704
|
test("arrow: streaming") do
|
666
705
|
output = create_output(".arrow")
|
667
|
-
@table.save(output, format: :
|
706
|
+
@table.save(output, format: :arrows)
|
707
|
+
assert_equal(@table, Arrow::Table.load(output))
|
708
|
+
end
|
709
|
+
|
710
|
+
test("arrows") do
|
711
|
+
output = create_output(".arrows")
|
712
|
+
@table.save(output, format: :arrows)
|
668
713
|
assert_equal(@table, Arrow::Table.load(output))
|
669
714
|
end
|
670
715
|
|
@@ -728,6 +773,47 @@ chris\t-1
|
|
728
773
|
end
|
729
774
|
end
|
730
775
|
end
|
776
|
+
|
777
|
+
sub_test_case("URI") do
|
778
|
+
def start_web_server(path, data, content_type)
|
779
|
+
http_server = WEBrick::HTTPServer.new(:Port => 0)
|
780
|
+
http_server.mount_proc(path) do |request, response|
|
781
|
+
response.body = data
|
782
|
+
response.content_type = content_type
|
783
|
+
end
|
784
|
+
http_server_thread = Thread.new do
|
785
|
+
http_server.start
|
786
|
+
end
|
787
|
+
begin
|
788
|
+
Timeout.timeout(1) do
|
789
|
+
yield(http_server[:Port])
|
790
|
+
end
|
791
|
+
ensure
|
792
|
+
http_server.shutdown
|
793
|
+
http_server_thread.join
|
794
|
+
end
|
795
|
+
end
|
796
|
+
|
797
|
+
data("Arrow File",
|
798
|
+
["arrow", "application/vnd.apache.arrow.file"])
|
799
|
+
data("Arrow Stream",
|
800
|
+
["arrows", "application/vnd.apache.arrow.stream"])
|
801
|
+
data("CSV",
|
802
|
+
["csv", "text/csv"])
|
803
|
+
def test_http(data)
|
804
|
+
extension, content_type = data
|
805
|
+
output = Arrow::ResizableBuffer.new(1024)
|
806
|
+
@table.save(output, format: extension.to_sym)
|
807
|
+
path = "/data.#{extension}"
|
808
|
+
start_web_server(path,
|
809
|
+
output.data.to_s,
|
810
|
+
content_type) do |port|
|
811
|
+
input = URI("http://127.0.0.1:#{port}#{path}")
|
812
|
+
loaded_table = Arrow::Table.load(input)
|
813
|
+
assert_equal(@table.to_s, loaded_table.to_s)
|
814
|
+
end
|
815
|
+
end
|
816
|
+
end
|
731
817
|
end
|
732
818
|
|
733
819
|
test("#pack") do
|
@@ -922,4 +1008,116 @@ visible: false
|
|
922
1008
|
TABLE
|
923
1009
|
end
|
924
1010
|
end
|
1011
|
+
|
1012
|
+
sub_test_case("#join") do
|
1013
|
+
test("keys: String") do
|
1014
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1015
|
+
number: [10, 20, 30])
|
1016
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1017
|
+
string: ["three", "one"])
|
1018
|
+
assert_equal(Arrow::Table.new([
|
1019
|
+
["key", [1, 3]],
|
1020
|
+
["number", [10, 30]],
|
1021
|
+
["key", [1, 3]],
|
1022
|
+
["string", ["one", "three"]],
|
1023
|
+
]),
|
1024
|
+
table1.join(table2, "key"))
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
test("keys: Symbol") do
|
1028
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1029
|
+
number: [10, 20, 30])
|
1030
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1031
|
+
string: ["three", "one"])
|
1032
|
+
assert_equal(Arrow::Table.new([
|
1033
|
+
["key", [1, 3]],
|
1034
|
+
["number", [10, 30]],
|
1035
|
+
["key", [1, 3]],
|
1036
|
+
["string", ["one", "three"]],
|
1037
|
+
]),
|
1038
|
+
table1.join(table2, :key))
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
test("keys: [String, Symbol]") do
|
1042
|
+
table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
|
1043
|
+
key2: [10, 100, 20, 200],
|
1044
|
+
number: [1010, 1100, 2020, 2200])
|
1045
|
+
table2 = Arrow::Table.new(key1: [1, 2, 2],
|
1046
|
+
key2: [100, 20, 50],
|
1047
|
+
string: ["1-100", "2-20", "2-50"])
|
1048
|
+
assert_equal(Arrow::Table.new([
|
1049
|
+
["key1", [1, 2]],
|
1050
|
+
["key2", [100, 20]],
|
1051
|
+
["number", [1100, 2020]],
|
1052
|
+
["key1", [1, 2]],
|
1053
|
+
["key2", [100, 20]],
|
1054
|
+
["string", ["1-100", "2-20"]],
|
1055
|
+
]),
|
1056
|
+
table1.join(table2, ["key1", :key2]))
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
test("keys: {left: String, right: Symbol}") do
|
1060
|
+
table1 = Arrow::Table.new(left_key: [1, 2, 3],
|
1061
|
+
number: [10, 20, 30])
|
1062
|
+
table2 = Arrow::Table.new(right_key: [3, 1],
|
1063
|
+
string: ["three", "one"])
|
1064
|
+
assert_equal(Arrow::Table.new([
|
1065
|
+
["left_key", [1, 3]],
|
1066
|
+
["number", [10, 30]],
|
1067
|
+
["right_key", [1, 3]],
|
1068
|
+
["string", ["one", "three"]],
|
1069
|
+
]),
|
1070
|
+
table1.join(table2, {left: "left_key", right: :right_key}))
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
|
1074
|
+
table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2],
|
1075
|
+
left_key2: [10, 100, 20, 200],
|
1076
|
+
number: [1010, 1100, 2020, 2200])
|
1077
|
+
table2 = Arrow::Table.new(right_key1: [1, 2, 2],
|
1078
|
+
right_key2: [100, 20, 50],
|
1079
|
+
string: ["1-100", "2-20", "2-50"])
|
1080
|
+
assert_equal(Arrow::Table.new([
|
1081
|
+
["left_key1", [1, 2]],
|
1082
|
+
["left_key2", [100, 20]],
|
1083
|
+
["number", [1100, 2020]],
|
1084
|
+
["right_key1", [1, 2]],
|
1085
|
+
["right_key2", [100, 20]],
|
1086
|
+
["string", ["1-100", "2-20"]],
|
1087
|
+
]),
|
1088
|
+
table1.join(table2,
|
1089
|
+
{
|
1090
|
+
left: ["left_key1", :left_key2],
|
1091
|
+
right: [:right_key1, "right_key2"],
|
1092
|
+
}))
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
test("type:") do
|
1096
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1097
|
+
number: [10, 20, 30])
|
1098
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1099
|
+
string: ["three", "one"])
|
1100
|
+
assert_equal(Arrow::Table.new([
|
1101
|
+
["key", [1, 3, 2]],
|
1102
|
+
["number", [10, 30, 20]],
|
1103
|
+
["key", [1, 3, nil]],
|
1104
|
+
["string", ["one", "three", nil]],
|
1105
|
+
]),
|
1106
|
+
table1.join(table2, "key", type: :left_outer))
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
test("left_outputs: & right_outputs:") do
|
1110
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1111
|
+
number: [10, 20, 30])
|
1112
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1113
|
+
string: ["three", "one"])
|
1114
|
+
assert_equal(Arrow::Table.new(key: [1, 3],
|
1115
|
+
number: [10, 30],
|
1116
|
+
string: ["one", "three"]),
|
1117
|
+
table1.join(table2,
|
1118
|
+
"key",
|
1119
|
+
left_outputs: ["key", "number"],
|
1120
|
+
right_outputs: ["string"]))
|
1121
|
+
end
|
1122
|
+
end
|
925
1123
|
end
|
@@ -276,6 +276,36 @@ module ValuesBasicArraysTests
|
|
276
276
|
target = build(Arrow::Decimal256Array.new(data_type, values))
|
277
277
|
assert_equal(values, target.values)
|
278
278
|
end
|
279
|
+
|
280
|
+
def test_month_interval
|
281
|
+
values = [
|
282
|
+
1,
|
283
|
+
nil,
|
284
|
+
12,
|
285
|
+
]
|
286
|
+
target = build(Arrow::MonthIntervalArray.new(values))
|
287
|
+
assert_equal(values, target.values)
|
288
|
+
end
|
289
|
+
|
290
|
+
def test_day_time_interval
|
291
|
+
values = [
|
292
|
+
{day: 1, millisecond: 100},
|
293
|
+
nil,
|
294
|
+
{day: 2, millisecond: 300},
|
295
|
+
]
|
296
|
+
target = build(Arrow::DayTimeIntervalArray.new(values))
|
297
|
+
assert_equal(values, target.values)
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_month_day_nano_interval
|
301
|
+
values = [
|
302
|
+
{month: 1, day: 1, nanosecond: 100},
|
303
|
+
nil,
|
304
|
+
{month: 2, day: 3, nanosecond: 400},
|
305
|
+
]
|
306
|
+
target = build(Arrow::MonthDayNanoIntervalArray.new(values))
|
307
|
+
assert_equal(values, target.values)
|
308
|
+
end
|
279
309
|
end
|
280
310
|
|
281
311
|
class ValuesArrayBasicArraysTest < Test::Unit::TestCase
|
@@ -347,6 +347,33 @@ module ValuesDenseUnionArrayTests
|
|
347
347
|
assert_equal(values, target.values)
|
348
348
|
end
|
349
349
|
|
350
|
+
def test_month_interval
|
351
|
+
values = [
|
352
|
+
{"0" => 1},
|
353
|
+
{"1" => nil},
|
354
|
+
]
|
355
|
+
target = build(:month_interval, values)
|
356
|
+
assert_equal(values, target.values)
|
357
|
+
end
|
358
|
+
|
359
|
+
def test_day_time_interval
|
360
|
+
values = [
|
361
|
+
{"0" => {day: 1, millisecond: 100}},
|
362
|
+
{"1" => nil},
|
363
|
+
]
|
364
|
+
target = build(:day_time_interval, values)
|
365
|
+
assert_equal(values, target.values)
|
366
|
+
end
|
367
|
+
|
368
|
+
def test_month_day_nano_interval
|
369
|
+
values = [
|
370
|
+
{"0" => {month: 1, day: 1, nanosecond: 100}},
|
371
|
+
{"1" => nil},
|
372
|
+
]
|
373
|
+
target = build(:month_day_nano_interval, values)
|
374
|
+
assert_equal(values, target.values)
|
375
|
+
end
|
376
|
+
|
350
377
|
def test_list
|
351
378
|
values = [
|
352
379
|
{"0" => [true, nil, false]},
|