red-arrow 6.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/ext/arrow/arrow.cpp +12 -0
- data/ext/arrow/converters.hpp +46 -10
- data/ext/arrow/extconf.rb +1 -1
- data/ext/arrow/raw-records.cpp +3 -2
- data/ext/arrow/red-arrow.hpp +7 -0
- data/ext/arrow/values.cpp +3 -2
- data/lib/arrow/datum.rb +2 -0
- data/lib/arrow/day-time-interval-array-builder.rb +29 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/loader.rb +16 -0
- data/lib/arrow/month-day-nano-interval-array-builder.rb +29 -0
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/table-loader.rb +99 -62
- data/lib/arrow/table-saver.rb +7 -2
- data/lib/arrow/table.rb +78 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -10
- data/test/helper.rb +2 -0
- data/test/raw-records/test-basic-arrays.rb +30 -0
- data/test/raw-records/test-dense-union-array.rb +27 -0
- data/test/raw-records/test-list-array.rb +39 -0
- data/test/raw-records/test-map-array.rb +37 -0
- data/test/raw-records/test-sparse-union-array.rb +27 -0
- data/test/raw-records/test-struct-array.rb +30 -0
- data/test/test-function.rb +48 -14
- data/test/test-table.rb +204 -6
- data/test/values/test-basic-arrays.rb +30 -0
- data/test/values/test-dense-union-array.rb +27 -0
- data/test/values/test-dictionary-array.rb +295 -0
- data/test/values/test-list-array.rb +39 -0
- data/test/values/test-map-array.rb +33 -0
- data/test/values/test-sparse-union-array.rb +27 -0
- data/test/values/test-struct-array.rb +30 -0
- metadata +88 -194
data/test/test-function.rb
CHANGED
@@ -53,6 +53,14 @@ class FunctionTest < Test::Unit::TestCase
|
|
53
53
|
or_function.execute(args).value.to_a)
|
54
54
|
end
|
55
55
|
|
56
|
+
test("Arrow::Column") do
|
57
|
+
or_function = Arrow::Function.find("or")
|
58
|
+
table = Arrow::Table.new(a: [true, false, false],
|
59
|
+
b: [true, false, true])
|
60
|
+
assert_equal([true, false, true],
|
61
|
+
or_function.execute([table.a, table.b]).value.to_a)
|
62
|
+
end
|
63
|
+
|
56
64
|
test("Arrow::Scalar") do
|
57
65
|
add_function = Arrow::Function.find("add")
|
58
66
|
args = [
|
@@ -116,12 +124,13 @@ class FunctionTest < Test::Unit::TestCase
|
|
116
124
|
cast_function = Arrow::Function.find("cast")
|
117
125
|
date = Date.new(2021, 6, 12)
|
118
126
|
args = [date]
|
119
|
-
options =
|
120
|
-
|
127
|
+
options = {
|
128
|
+
to_data_type: Arrow::TimestampDataType.new(:second),
|
129
|
+
}
|
121
130
|
time = Time.utc(date.year,
|
122
131
|
date.month,
|
123
132
|
date.day)
|
124
|
-
assert_equal(Arrow::TimestampScalar.new(options
|
133
|
+
assert_equal(Arrow::TimestampScalar.new(options[:to_data_type],
|
125
134
|
time.to_i),
|
126
135
|
cast_function.execute(args, options).value)
|
127
136
|
end
|
@@ -132,9 +141,10 @@ class FunctionTest < Test::Unit::TestCase
|
|
132
141
|
# 00:10:00
|
133
142
|
60 * 10)
|
134
143
|
args = [arrow_time]
|
135
|
-
options =
|
136
|
-
|
137
|
-
|
144
|
+
options = {
|
145
|
+
to_data_type: Arrow::Time64DataType.new(:micro),
|
146
|
+
}
|
147
|
+
assert_equal(Arrow::Time64Scalar.new(options[:to_data_type],
|
138
148
|
# 00:10:00.000000
|
139
149
|
60 * 10 * 1000 * 1000),
|
140
150
|
cast_function.execute(args, options).value)
|
@@ -146,10 +156,11 @@ class FunctionTest < Test::Unit::TestCase
|
|
146
156
|
# 00:10:00.000000
|
147
157
|
60 * 10 * 1000 * 1000)
|
148
158
|
args = [arrow_time]
|
149
|
-
options =
|
150
|
-
|
151
|
-
|
152
|
-
|
159
|
+
options = {
|
160
|
+
to_data_type: Arrow::Time32DataType.new(:second),
|
161
|
+
allow_time_truncate: true,
|
162
|
+
}
|
163
|
+
assert_equal(Arrow::Time32Scalar.new(options[:to_data_type],
|
153
164
|
# 00:10:00
|
154
165
|
60 * 10),
|
155
166
|
cast_function.execute(args, options).value)
|
@@ -159,18 +170,41 @@ class FunctionTest < Test::Unit::TestCase
|
|
159
170
|
cast_function = Arrow::Function.find("cast")
|
160
171
|
time = Time.utc(2021, 6, 12, 1, 2, 3, 1)
|
161
172
|
args = [time]
|
162
|
-
options =
|
163
|
-
|
164
|
-
|
173
|
+
options = {
|
174
|
+
to_data_type: Arrow::TimestampDataType.new(:second),
|
175
|
+
allow_time_truncate: true,
|
176
|
+
}
|
165
177
|
time = Time.utc(time.year,
|
166
178
|
time.month,
|
167
179
|
time.day,
|
168
180
|
time.hour,
|
169
181
|
time.min,
|
170
182
|
time.sec)
|
171
|
-
assert_equal(Arrow::TimestampScalar.new(options
|
183
|
+
assert_equal(Arrow::TimestampScalar.new(options[:to_data_type],
|
172
184
|
time.to_i),
|
173
185
|
cast_function.execute(args, options).value)
|
174
186
|
end
|
187
|
+
|
188
|
+
test("SetLookupOptions") do
|
189
|
+
is_in_function = Arrow::Function.find("is_in")
|
190
|
+
args = [
|
191
|
+
Arrow::Int16Array.new([1, 0, 1, 2]),
|
192
|
+
]
|
193
|
+
options = {
|
194
|
+
value_set: Arrow::Int16Array.new([2, 0]),
|
195
|
+
}
|
196
|
+
assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
|
197
|
+
is_in_function.execute(args, options).value)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_call
|
202
|
+
or_function = Arrow::Function.find("or")
|
203
|
+
args = [
|
204
|
+
Arrow::BooleanArray.new([true, false, false]),
|
205
|
+
Arrow::BooleanArray.new([true, false, true]),
|
206
|
+
]
|
207
|
+
assert_equal([true, false, true],
|
208
|
+
or_function.call(args).value.to_a)
|
175
209
|
end
|
176
210
|
end
|
data/test/test-table.rb
CHANGED
@@ -186,7 +186,12 @@ class TableTest < Test::Unit::TestCase
|
|
186
186
|
end
|
187
187
|
|
188
188
|
test("{key: Range}: beginless include end") do
|
189
|
-
|
189
|
+
begin
|
190
|
+
range = eval("..8")
|
191
|
+
rescue SyntaxError
|
192
|
+
omit("beginless range isn't supported")
|
193
|
+
end
|
194
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
190
195
|
count visible
|
191
196
|
0 1 true
|
192
197
|
1 2 false
|
@@ -196,7 +201,12 @@ class TableTest < Test::Unit::TestCase
|
|
196
201
|
end
|
197
202
|
|
198
203
|
test("{key: Range}: beginless exclude end") do
|
199
|
-
|
204
|
+
begin
|
205
|
+
range = eval("...8")
|
206
|
+
rescue SyntaxError
|
207
|
+
omit("beginless range isn't supported")
|
208
|
+
end
|
209
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
200
210
|
count visible
|
201
211
|
0 1 true
|
202
212
|
1 2 false
|
@@ -205,7 +215,12 @@ class TableTest < Test::Unit::TestCase
|
|
205
215
|
end
|
206
216
|
|
207
217
|
test("{key: Range}: endless") do
|
208
|
-
|
218
|
+
begin
|
219
|
+
range = eval("16..")
|
220
|
+
rescue SyntaxError
|
221
|
+
omit("endless range isn't supported")
|
222
|
+
end
|
223
|
+
assert_equal(<<-TABLE, @table.slice(count: range).to_s)
|
209
224
|
count visible
|
210
225
|
0 16 true
|
211
226
|
1 32 false
|
@@ -573,14 +588,20 @@ class TableTest < Test::Unit::TestCase
|
|
573
588
|
assert_equal(@table, Arrow::Table.load(output, format: :batch))
|
574
589
|
end
|
575
590
|
|
591
|
+
def test_arrows
|
592
|
+
output = create_output(".arrows")
|
593
|
+
@table.save(output, format: :arrows)
|
594
|
+
assert_equal(@table, Arrow::Table.load(output, format: :arrows))
|
595
|
+
end
|
596
|
+
|
576
597
|
def test_arrow_streaming
|
577
|
-
output = create_output(".
|
598
|
+
output = create_output(".arrows")
|
578
599
|
@table.save(output, format: :arrow_streaming)
|
579
600
|
assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
|
580
601
|
end
|
581
602
|
|
582
603
|
def test_stream
|
583
|
-
output = create_output(".
|
604
|
+
output = create_output(".arrows")
|
584
605
|
@table.save(output, format: :stream)
|
585
606
|
assert_equal(@table, Arrow::Table.load(output, format: :stream))
|
586
607
|
end
|
@@ -626,6 +647,24 @@ class TableTest < Test::Unit::TestCase
|
|
626
647
|
end
|
627
648
|
|
628
649
|
sub_test_case("save: auto detect") do
|
650
|
+
test("arrow") do
|
651
|
+
output = create_output(".arrow")
|
652
|
+
@table.save(output)
|
653
|
+
assert_equal(@table,
|
654
|
+
Arrow::Table.load(output,
|
655
|
+
format: :arrow,
|
656
|
+
schema: @table.schema))
|
657
|
+
end
|
658
|
+
|
659
|
+
test("arrows") do
|
660
|
+
output = create_output(".arrows")
|
661
|
+
@table.save(output)
|
662
|
+
assert_equal(@table,
|
663
|
+
Arrow::Table.load(output,
|
664
|
+
format: :arrows,
|
665
|
+
schema: @table.schema))
|
666
|
+
end
|
667
|
+
|
629
668
|
test("csv") do
|
630
669
|
output = create_output(".csv")
|
631
670
|
@table.save(output)
|
@@ -664,7 +703,13 @@ class TableTest < Test::Unit::TestCase
|
|
664
703
|
|
665
704
|
test("arrow: streaming") do
|
666
705
|
output = create_output(".arrow")
|
667
|
-
@table.save(output, format: :
|
706
|
+
@table.save(output, format: :arrows)
|
707
|
+
assert_equal(@table, Arrow::Table.load(output))
|
708
|
+
end
|
709
|
+
|
710
|
+
test("arrows") do
|
711
|
+
output = create_output(".arrows")
|
712
|
+
@table.save(output, format: :arrows)
|
668
713
|
assert_equal(@table, Arrow::Table.load(output))
|
669
714
|
end
|
670
715
|
|
@@ -728,6 +773,47 @@ chris\t-1
|
|
728
773
|
end
|
729
774
|
end
|
730
775
|
end
|
776
|
+
|
777
|
+
sub_test_case("URI") do
|
778
|
+
def start_web_server(path, data, content_type)
|
779
|
+
http_server = WEBrick::HTTPServer.new(:Port => 0)
|
780
|
+
http_server.mount_proc(path) do |request, response|
|
781
|
+
response.body = data
|
782
|
+
response.content_type = content_type
|
783
|
+
end
|
784
|
+
http_server_thread = Thread.new do
|
785
|
+
http_server.start
|
786
|
+
end
|
787
|
+
begin
|
788
|
+
Timeout.timeout(1) do
|
789
|
+
yield(http_server[:Port])
|
790
|
+
end
|
791
|
+
ensure
|
792
|
+
http_server.shutdown
|
793
|
+
http_server_thread.join
|
794
|
+
end
|
795
|
+
end
|
796
|
+
|
797
|
+
data("Arrow File",
|
798
|
+
["arrow", "application/vnd.apache.arrow.file"])
|
799
|
+
data("Arrow Stream",
|
800
|
+
["arrows", "application/vnd.apache.arrow.stream"])
|
801
|
+
data("CSV",
|
802
|
+
["csv", "text/csv"])
|
803
|
+
def test_http(data)
|
804
|
+
extension, content_type = data
|
805
|
+
output = Arrow::ResizableBuffer.new(1024)
|
806
|
+
@table.save(output, format: extension.to_sym)
|
807
|
+
path = "/data.#{extension}"
|
808
|
+
start_web_server(path,
|
809
|
+
output.data.to_s,
|
810
|
+
content_type) do |port|
|
811
|
+
input = URI("http://127.0.0.1:#{port}#{path}")
|
812
|
+
loaded_table = Arrow::Table.load(input)
|
813
|
+
assert_equal(@table.to_s, loaded_table.to_s)
|
814
|
+
end
|
815
|
+
end
|
816
|
+
end
|
731
817
|
end
|
732
818
|
|
733
819
|
test("#pack") do
|
@@ -922,4 +1008,116 @@ visible: false
|
|
922
1008
|
TABLE
|
923
1009
|
end
|
924
1010
|
end
|
1011
|
+
|
1012
|
+
sub_test_case("#join") do
|
1013
|
+
test("keys: String") do
|
1014
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1015
|
+
number: [10, 20, 30])
|
1016
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1017
|
+
string: ["three", "one"])
|
1018
|
+
assert_equal(Arrow::Table.new([
|
1019
|
+
["key", [1, 3]],
|
1020
|
+
["number", [10, 30]],
|
1021
|
+
["key", [1, 3]],
|
1022
|
+
["string", ["one", "three"]],
|
1023
|
+
]),
|
1024
|
+
table1.join(table2, "key"))
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
test("keys: Symbol") do
|
1028
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1029
|
+
number: [10, 20, 30])
|
1030
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1031
|
+
string: ["three", "one"])
|
1032
|
+
assert_equal(Arrow::Table.new([
|
1033
|
+
["key", [1, 3]],
|
1034
|
+
["number", [10, 30]],
|
1035
|
+
["key", [1, 3]],
|
1036
|
+
["string", ["one", "three"]],
|
1037
|
+
]),
|
1038
|
+
table1.join(table2, :key))
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
test("keys: [String, Symbol]") do
|
1042
|
+
table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
|
1043
|
+
key2: [10, 100, 20, 200],
|
1044
|
+
number: [1010, 1100, 2020, 2200])
|
1045
|
+
table2 = Arrow::Table.new(key1: [1, 2, 2],
|
1046
|
+
key2: [100, 20, 50],
|
1047
|
+
string: ["1-100", "2-20", "2-50"])
|
1048
|
+
assert_equal(Arrow::Table.new([
|
1049
|
+
["key1", [1, 2]],
|
1050
|
+
["key2", [100, 20]],
|
1051
|
+
["number", [1100, 2020]],
|
1052
|
+
["key1", [1, 2]],
|
1053
|
+
["key2", [100, 20]],
|
1054
|
+
["string", ["1-100", "2-20"]],
|
1055
|
+
]),
|
1056
|
+
table1.join(table2, ["key1", :key2]))
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
test("keys: {left: String, right: Symbol}") do
|
1060
|
+
table1 = Arrow::Table.new(left_key: [1, 2, 3],
|
1061
|
+
number: [10, 20, 30])
|
1062
|
+
table2 = Arrow::Table.new(right_key: [3, 1],
|
1063
|
+
string: ["three", "one"])
|
1064
|
+
assert_equal(Arrow::Table.new([
|
1065
|
+
["left_key", [1, 3]],
|
1066
|
+
["number", [10, 30]],
|
1067
|
+
["right_key", [1, 3]],
|
1068
|
+
["string", ["one", "three"]],
|
1069
|
+
]),
|
1070
|
+
table1.join(table2, {left: "left_key", right: :right_key}))
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
|
1074
|
+
table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2],
|
1075
|
+
left_key2: [10, 100, 20, 200],
|
1076
|
+
number: [1010, 1100, 2020, 2200])
|
1077
|
+
table2 = Arrow::Table.new(right_key1: [1, 2, 2],
|
1078
|
+
right_key2: [100, 20, 50],
|
1079
|
+
string: ["1-100", "2-20", "2-50"])
|
1080
|
+
assert_equal(Arrow::Table.new([
|
1081
|
+
["left_key1", [1, 2]],
|
1082
|
+
["left_key2", [100, 20]],
|
1083
|
+
["number", [1100, 2020]],
|
1084
|
+
["right_key1", [1, 2]],
|
1085
|
+
["right_key2", [100, 20]],
|
1086
|
+
["string", ["1-100", "2-20"]],
|
1087
|
+
]),
|
1088
|
+
table1.join(table2,
|
1089
|
+
{
|
1090
|
+
left: ["left_key1", :left_key2],
|
1091
|
+
right: [:right_key1, "right_key2"],
|
1092
|
+
}))
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
test("type:") do
|
1096
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1097
|
+
number: [10, 20, 30])
|
1098
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1099
|
+
string: ["three", "one"])
|
1100
|
+
assert_equal(Arrow::Table.new([
|
1101
|
+
["key", [1, 3, 2]],
|
1102
|
+
["number", [10, 30, 20]],
|
1103
|
+
["key", [1, 3, nil]],
|
1104
|
+
["string", ["one", "three", nil]],
|
1105
|
+
]),
|
1106
|
+
table1.join(table2, "key", type: :left_outer))
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
test("left_outputs: & right_outputs:") do
|
1110
|
+
table1 = Arrow::Table.new(key: [1, 2, 3],
|
1111
|
+
number: [10, 20, 30])
|
1112
|
+
table2 = Arrow::Table.new(key: [3, 1],
|
1113
|
+
string: ["three", "one"])
|
1114
|
+
assert_equal(Arrow::Table.new(key: [1, 3],
|
1115
|
+
number: [10, 30],
|
1116
|
+
string: ["one", "three"]),
|
1117
|
+
table1.join(table2,
|
1118
|
+
"key",
|
1119
|
+
left_outputs: ["key", "number"],
|
1120
|
+
right_outputs: ["string"]))
|
1121
|
+
end
|
1122
|
+
end
|
925
1123
|
end
|
@@ -276,6 +276,36 @@ module ValuesBasicArraysTests
|
|
276
276
|
target = build(Arrow::Decimal256Array.new(data_type, values))
|
277
277
|
assert_equal(values, target.values)
|
278
278
|
end
|
279
|
+
|
280
|
+
def test_month_interval
|
281
|
+
values = [
|
282
|
+
1,
|
283
|
+
nil,
|
284
|
+
12,
|
285
|
+
]
|
286
|
+
target = build(Arrow::MonthIntervalArray.new(values))
|
287
|
+
assert_equal(values, target.values)
|
288
|
+
end
|
289
|
+
|
290
|
+
def test_day_time_interval
|
291
|
+
values = [
|
292
|
+
{day: 1, millisecond: 100},
|
293
|
+
nil,
|
294
|
+
{day: 2, millisecond: 300},
|
295
|
+
]
|
296
|
+
target = build(Arrow::DayTimeIntervalArray.new(values))
|
297
|
+
assert_equal(values, target.values)
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_month_day_nano_interval
|
301
|
+
values = [
|
302
|
+
{month: 1, day: 1, nanosecond: 100},
|
303
|
+
nil,
|
304
|
+
{month: 2, day: 3, nanosecond: 400},
|
305
|
+
]
|
306
|
+
target = build(Arrow::MonthDayNanoIntervalArray.new(values))
|
307
|
+
assert_equal(values, target.values)
|
308
|
+
end
|
279
309
|
end
|
280
310
|
|
281
311
|
class ValuesArrayBasicArraysTest < Test::Unit::TestCase
|
@@ -347,6 +347,33 @@ module ValuesDenseUnionArrayTests
|
|
347
347
|
assert_equal(values, target.values)
|
348
348
|
end
|
349
349
|
|
350
|
+
def test_month_interval
|
351
|
+
values = [
|
352
|
+
{"0" => 1},
|
353
|
+
{"1" => nil},
|
354
|
+
]
|
355
|
+
target = build(:month_interval, values)
|
356
|
+
assert_equal(values, target.values)
|
357
|
+
end
|
358
|
+
|
359
|
+
def test_day_time_interval
|
360
|
+
values = [
|
361
|
+
{"0" => {day: 1, millisecond: 100}},
|
362
|
+
{"1" => nil},
|
363
|
+
]
|
364
|
+
target = build(:day_time_interval, values)
|
365
|
+
assert_equal(values, target.values)
|
366
|
+
end
|
367
|
+
|
368
|
+
def test_month_day_nano_interval
|
369
|
+
values = [
|
370
|
+
{"0" => {month: 1, day: 1, nanosecond: 100}},
|
371
|
+
{"1" => nil},
|
372
|
+
]
|
373
|
+
target = build(:month_day_nano_interval, values)
|
374
|
+
assert_equal(values, target.values)
|
375
|
+
end
|
376
|
+
|
350
377
|
def test_list
|
351
378
|
values = [
|
352
379
|
{"0" => [true, nil, false]},
|