red-arrow 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -24,15 +24,44 @@ class ArrayTest < Test::Unit::TestCase
24
24
  end
25
25
  end
26
26
 
27
- test("#each") do
28
- array = Arrow::BooleanArray.new([true, false, nil, true])
29
- assert_equal([true, false, nil, true],
30
- array.to_a)
31
- end
27
+ sub_test_case("instance methods") do
28
+ def setup
29
+ @values = [true, false, nil, true]
30
+ @array = Arrow::BooleanArray.new(@values)
31
+ end
32
+
33
+ test("#each") do
34
+ assert_equal(@values, @array.to_a)
35
+ end
36
+
37
+ sub_test_case("#[]") do
38
+ test("valid range") do
39
+ assert_equal(@values,
40
+ @array.length.times.collect {|i| @array[i]})
41
+ end
32
42
 
33
- test("#[]") do
34
- array = Arrow::BooleanArray.new([true, false, nil, true])
35
- assert_equal([true, false, nil, true],
36
- [array[0], array[1], array[2], array[3]])
43
+ test("out of range") do
44
+ assert_nil(@array[@array.length])
45
+ end
46
+
47
+ test("negative index") do
48
+ assert_equal(@values.last,
49
+ @array[-1])
50
+ end
51
+ end
52
+
53
+ sub_test_case("#==") do
54
+ test("Arrow::Array") do
55
+ assert do
56
+ @array == @array
57
+ end
58
+ end
59
+
60
+ test("not Arrow::Array") do
61
+ assert do
62
+ not (@array == 29)
63
+ end
64
+ end
65
+ end
37
66
  end
38
67
  end
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class BigDecimalTest < Test::Unit::TestCase
19
+ test("#to_arrow") do
20
+ assert_equal(Arrow::Decimal128.new("3.14"),
21
+ BigDecimal("3.14").to_arrow)
22
+ end
23
+ end
@@ -1,5 +1,3 @@
1
- # -*- ruby -*-
2
- #
3
1
  # Licensed to the Apache Software Foundation (ASF) under one
4
2
  # or more contributor license agreements. See the NOTICE file
5
3
  # distributed with this work for additional information
@@ -17,26 +15,23 @@
17
15
  # specific language governing permissions and limitations
18
16
  # under the License.
19
17
 
20
- require "pkg-config"
21
- require "native-package-installer"
22
-
23
- case RUBY_PLATFORM
24
- when /mingw|mswin/
25
- task :default => "nothing"
26
- else
27
- task :default => "dependency:check"
28
- end
18
+ class BufferTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @buffer = Arrow::Buffer.new("Hello")
22
+ end
29
23
 
30
- task :nothing do
31
- end
24
+ sub_test_case("#==") do
25
+ test("Arrow::Buffer") do
26
+ assert do
27
+ @buffer == @buffer
28
+ end
29
+ end
32
30
 
33
- namespace :dependency do
34
- desc "Check dependency"
35
- task :check do
36
- unless PKGConfig.check_version?("arrow-glib", 0, 9, 0)
37
- unless NativePackageInstaller.install(:debian => "libarrow-glib-dev",
38
- :redhat => "arrow-glib-devel")
39
- exit(false)
31
+ test("not Arrow::Buffer") do
32
+ assert do
33
+ not (@buffer == 29)
34
+ end
40
35
  end
41
36
  end
42
37
  end
@@ -62,4 +62,26 @@ class ChunkedArrayTest < Test::Unit::TestCase
62
62
  ])
63
63
  end
64
64
  end
65
+
66
+ sub_test_case("#==") do
67
+ def setup
68
+ arrays = [
69
+ Arrow::BooleanArray.new([true]),
70
+ Arrow::BooleanArray.new([false, true]),
71
+ ]
72
+ @chunked_array = Arrow::ChunkedArray.new(arrays)
73
+ end
74
+
75
+ test("Arrow::ChunkedArray") do
76
+ assert do
77
+ @chunked_array == @chunked_array
78
+ end
79
+ end
80
+
81
+ test("not Arrow::ChunkedArray") do
82
+ assert do
83
+ not (@chunked_array == 29)
84
+ end
85
+ end
86
+ end
65
87
  end
@@ -40,4 +40,28 @@ class ColumnTest < Test::Unit::TestCase
40
40
  assert_equal([1, [true, false, nil, true]],
41
41
  [packed_column.data.n_chunks, packed_column.to_a])
42
42
  end
43
+
44
+ sub_test_case("#==") do
45
+ def setup
46
+ arrays = [
47
+ Arrow::BooleanArray.new([true]),
48
+ Arrow::BooleanArray.new([false, true]),
49
+ ]
50
+ chunked_array = Arrow::ChunkedArray.new(arrays)
51
+ @column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
52
+ chunked_array)
53
+ end
54
+
55
+ test("Arrow::Column") do
56
+ assert do
57
+ @column == @column
58
+ end
59
+ end
60
+
61
+ test("not Arrow::Column") do
62
+ assert do
63
+ not (@column == 29)
64
+ end
65
+ end
66
+ end
43
67
  end
@@ -141,5 +141,35 @@ count
141
141
  4
142
142
  CSV
143
143
  end
144
+
145
+ test(":encoding") do
146
+ messages = [
147
+ "\u3042", # U+3042 HIRAGANA LETTER A
148
+ "\u3044", # U+3044 HIRAGANA LETTER I
149
+ "\u3046", # U+3046 HIRAGANA LETTER U
150
+ ]
151
+ table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
152
+ encoding = "cp932"
153
+ assert_equal(table,
154
+ load_csv((["message"] + messages).join("\n").encode(encoding),
155
+ schema: table.schema,
156
+ encoding: encoding))
157
+ end
158
+
159
+ test(":encoding and :compression") do
160
+ messages = [
161
+ "\u3042", # U+3042 HIRAGANA LETTER A
162
+ "\u3044", # U+3044 HIRAGANA LETTER I
163
+ "\u3046", # U+3046 HIRAGANA LETTER U
164
+ ]
165
+ table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
166
+ encoding = "cp932"
167
+ csv = (["message"] + messages).join("\n").encode(encoding)
168
+ assert_equal(table,
169
+ load_csv(Zlib::Deflate.deflate(csv),
170
+ schema: table.schema,
171
+ encoding: encoding,
172
+ compression: :gzip))
173
+ end
144
174
  end
145
175
  end
@@ -43,5 +43,30 @@ class DataTypeTest < Test::Unit::TestCase
43
43
  assert_equal(Arrow::ListDataType.new(field),
44
44
  Arrow::DataType.resolve(type: :list, field: field))
45
45
  end
46
+
47
+ test("_") do
48
+ assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
49
+ Arrow::DataType.resolve([:fixed_size_binary, 10]))
50
+ end
51
+ end
52
+
53
+ sub_test_case("instance methods") do
54
+ def setup
55
+ @data_type = Arrow::StringDataType.new
56
+ end
57
+
58
+ sub_test_case("#==") do
59
+ test("Arrow::DataType") do
60
+ assert do
61
+ @data_type == @data_type
62
+ end
63
+ end
64
+
65
+ test("not Arrow::DataType") do
66
+ assert do
67
+ not (@data_type == 29)
68
+ end
69
+ end
70
+ end
46
71
  end
47
72
  end
@@ -0,0 +1,64 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Decimal128Test < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @decimal128 = Arrow::Decimal128.new("10.1")
22
+ end
23
+
24
+ sub_test_case("#==") do
25
+ test("Arrow::Decimal128") do
26
+ assert do
27
+ @decimal128 == @decimal128
28
+ end
29
+ end
30
+
31
+ test("not Arrow::Decimal128") do
32
+ assert do
33
+ not (@decimal128 == 10.1)
34
+ end
35
+ end
36
+ end
37
+
38
+ sub_test_case("#!=") do
39
+ test("Arrow::Decimal128") do
40
+ assert do
41
+ not (@decimal128 != @decimal128)
42
+ end
43
+ end
44
+
45
+ test("not Arrow::Decimal128") do
46
+ assert do
47
+ @decimal128 != 10.1
48
+ end
49
+ end
50
+ end
51
+
52
+ sub_test_case("#to_s") do
53
+ test("default") do
54
+ assert_equal("101",
55
+ @decimal128.to_s)
56
+ end
57
+
58
+ test("scale") do
59
+ assert_equal("10.1",
60
+ @decimal128.to_s(1))
61
+ end
62
+ end
63
+ end
64
+ end
@@ -68,4 +68,24 @@ class FieldTest < Test::Unit::TestCase
68
68
  Arrow::Field.new(description).to_s)
69
69
  end
70
70
  end
71
+
72
+ sub_test_case("instance methods") do
73
+ def setup
74
+ @field = Arrow::Field.new("count", :uint32)
75
+ end
76
+
77
+ sub_test_case("#==") do
78
+ test("Arrow::Field") do
79
+ assert do
80
+ @field == @field
81
+ end
82
+ end
83
+
84
+ test("not Arrow::Field") do
85
+ assert do
86
+ not (@field == 29)
87
+ end
88
+ end
89
+ end
90
+ end
71
91
  end
@@ -43,8 +43,8 @@ class GroupTest < Test::Unit::TestCase
43
43
  table = Arrow::Table.new(raw_table)
44
44
  assert_equal(<<-TABLE, table.group(:time).count.to_s)
45
45
  time int
46
- 0 2018-01-29T00:00:00+09:00 1
47
- 1 2018-01-30T00:00:00+09:00 1
46
+ 0 #{time_values[0].iso8601} 1
47
+ 1 #{time_values[1].iso8601} 1
48
48
  TABLE
49
49
  end
50
50
  end
@@ -112,5 +112,14 @@ class RecordBatchBuilderTest < Test::Unit::TestCase
112
112
  arrays),
113
113
  @builder.flush)
114
114
  end
115
+
116
+ test("#column_builders") do
117
+ column_builders = [
118
+ @builder.get_column_builder(0),
119
+ @builder.get_column_builder(1),
120
+ ]
121
+ assert_equal(column_builders,
122
+ @builder.column_builders)
123
+ end
115
124
  end
116
125
  end
@@ -108,5 +108,19 @@ class RecordBatchTest < Test::Unit::TestCase
108
108
  assert_equal(Arrow::Table.new(@schema, [@counts]),
109
109
  @record_batch.to_table)
110
110
  end
111
+
112
+ sub_test_case("#==") do
113
+ test("Arrow::RecordBatch") do
114
+ assert do
115
+ @record_batch == @record_batch
116
+ end
117
+ end
118
+
119
+ test("not Arrow::RecordBatch") do
120
+ assert do
121
+ not (@record_batch == 29)
122
+ end
123
+ end
124
+ end
111
125
  end
112
126
  end
@@ -100,5 +100,19 @@ class SchemaTest < Test::Unit::TestCase
100
100
  end
101
101
  end
102
102
  end
103
+
104
+ sub_test_case("#==") do
105
+ test("Arrow::Schema") do
106
+ assert do
107
+ @schema == @schema
108
+ end
109
+ end
110
+
111
+ test("not Arrow::Schema") do
112
+ assert do
113
+ not (@schema == 29)
114
+ end
115
+ end
116
+ end
103
117
  end
104
118
  end
@@ -49,9 +49,22 @@ class StructArrayTest < Test::Unit::TestCase
49
49
  end
50
50
 
51
51
  test("#[]") do
52
- notify("TODO: Returns Arrow::Struct instead.")
53
- assert_equal([[true, false], [1, 2]],
54
- [@array[0].to_a, @array[1].to_a])
52
+ assert_equal([
53
+ Arrow::Struct.new(@array, 0),
54
+ Arrow::Struct.new(@array, 1),
55
+ ],
56
+ @array.to_a)
57
+ end
58
+
59
+ test("#get_value") do
60
+ assert_equal([
61
+ Arrow::Struct.new(@array, 0),
62
+ Arrow::Struct.new(@array, 1),
63
+ ],
64
+ [
65
+ @array.get_value(0),
66
+ @array.get_value(1),
67
+ ])
55
68
  end
56
69
 
57
70
  sub_test_case("#find_field") do
@@ -596,5 +596,19 @@ visible: false
596
596
  end
597
597
  end
598
598
  end
599
+
600
+ sub_test_case("#==") do
601
+ test("Arrow::Table") do
602
+ assert do
603
+ @table == @table
604
+ end
605
+ end
606
+
607
+ test("not Arrow::Table") do
608
+ assert do
609
+ not (@table == 29)
610
+ end
611
+ end
612
+ end
599
613
  end
600
614
  end