red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -24,15 +24,44 @@ class ArrayTest < Test::Unit::TestCase
24
24
  end
25
25
  end
26
26
 
27
- test("#each") do
28
- array = Arrow::BooleanArray.new([true, false, nil, true])
29
- assert_equal([true, false, nil, true],
30
- array.to_a)
31
- end
27
+ sub_test_case("instance methods") do
28
+ def setup
29
+ @values = [true, false, nil, true]
30
+ @array = Arrow::BooleanArray.new(@values)
31
+ end
32
+
33
+ test("#each") do
34
+ assert_equal(@values, @array.to_a)
35
+ end
36
+
37
+ sub_test_case("#[]") do
38
+ test("valid range") do
39
+ assert_equal(@values,
40
+ @array.length.times.collect {|i| @array[i]})
41
+ end
32
42
 
33
- test("#[]") do
34
- array = Arrow::BooleanArray.new([true, false, nil, true])
35
- assert_equal([true, false, nil, true],
36
- [array[0], array[1], array[2], array[3]])
43
+ test("out of range") do
44
+ assert_nil(@array[@array.length])
45
+ end
46
+
47
+ test("negative index") do
48
+ assert_equal(@values.last,
49
+ @array[-1])
50
+ end
51
+ end
52
+
53
+ sub_test_case("#==") do
54
+ test("Arrow::Array") do
55
+ assert do
56
+ @array == @array
57
+ end
58
+ end
59
+
60
+ test("not Arrow::Array") do
61
+ assert do
62
+ not (@array == 29)
63
+ end
64
+ end
65
+ end
37
66
  end
38
67
  end
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class BigDecimalTest < Test::Unit::TestCase
19
+ test("#to_arrow") do
20
+ assert_equal(Arrow::Decimal128.new("3.14"),
21
+ BigDecimal("3.14").to_arrow)
22
+ end
23
+ end
@@ -1,5 +1,3 @@
1
- # -*- ruby -*-
2
- #
3
1
  # Licensed to the Apache Software Foundation (ASF) under one
4
2
  # or more contributor license agreements. See the NOTICE file
5
3
  # distributed with this work for additional information
@@ -17,26 +15,23 @@
17
15
  # specific language governing permissions and limitations
18
16
  # under the License.
19
17
 
20
- require "pkg-config"
21
- require "native-package-installer"
22
-
23
- case RUBY_PLATFORM
24
- when /mingw|mswin/
25
- task :default => "nothing"
26
- else
27
- task :default => "dependency:check"
28
- end
18
+ class BufferTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @buffer = Arrow::Buffer.new("Hello")
22
+ end
29
23
 
30
- task :nothing do
31
- end
24
+ sub_test_case("#==") do
25
+ test("Arrow::Buffer") do
26
+ assert do
27
+ @buffer == @buffer
28
+ end
29
+ end
32
30
 
33
- namespace :dependency do
34
- desc "Check dependency"
35
- task :check do
36
- unless PKGConfig.check_version?("arrow-glib", 0, 9, 0)
37
- unless NativePackageInstaller.install(:debian => "libarrow-glib-dev",
38
- :redhat => "arrow-glib-devel")
39
- exit(false)
31
+ test("not Arrow::Buffer") do
32
+ assert do
33
+ not (@buffer == 29)
34
+ end
40
35
  end
41
36
  end
42
37
  end
@@ -62,4 +62,26 @@ class ChunkedArrayTest < Test::Unit::TestCase
62
62
  ])
63
63
  end
64
64
  end
65
+
66
+ sub_test_case("#==") do
67
+ def setup
68
+ arrays = [
69
+ Arrow::BooleanArray.new([true]),
70
+ Arrow::BooleanArray.new([false, true]),
71
+ ]
72
+ @chunked_array = Arrow::ChunkedArray.new(arrays)
73
+ end
74
+
75
+ test("Arrow::ChunkedArray") do
76
+ assert do
77
+ @chunked_array == @chunked_array
78
+ end
79
+ end
80
+
81
+ test("not Arrow::ChunkedArray") do
82
+ assert do
83
+ not (@chunked_array == 29)
84
+ end
85
+ end
86
+ end
65
87
  end
@@ -40,4 +40,28 @@ class ColumnTest < Test::Unit::TestCase
40
40
  assert_equal([1, [true, false, nil, true]],
41
41
  [packed_column.data.n_chunks, packed_column.to_a])
42
42
  end
43
+
44
+ sub_test_case("#==") do
45
+ def setup
46
+ arrays = [
47
+ Arrow::BooleanArray.new([true]),
48
+ Arrow::BooleanArray.new([false, true]),
49
+ ]
50
+ chunked_array = Arrow::ChunkedArray.new(arrays)
51
+ @column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
52
+ chunked_array)
53
+ end
54
+
55
+ test("Arrow::Column") do
56
+ assert do
57
+ @column == @column
58
+ end
59
+ end
60
+
61
+ test("not Arrow::Column") do
62
+ assert do
63
+ not (@column == 29)
64
+ end
65
+ end
66
+ end
43
67
  end
@@ -141,5 +141,35 @@ count
141
141
  4
142
142
  CSV
143
143
  end
144
+
145
+ test(":encoding") do
146
+ messages = [
147
+ "\u3042", # U+3042 HIRAGANA LETTER A
148
+ "\u3044", # U+3044 HIRAGANA LETTER I
149
+ "\u3046", # U+3046 HIRAGANA LETTER U
150
+ ]
151
+ table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
152
+ encoding = "cp932"
153
+ assert_equal(table,
154
+ load_csv((["message"] + messages).join("\n").encode(encoding),
155
+ schema: table.schema,
156
+ encoding: encoding))
157
+ end
158
+
159
+ test(":encoding and :compression") do
160
+ messages = [
161
+ "\u3042", # U+3042 HIRAGANA LETTER A
162
+ "\u3044", # U+3044 HIRAGANA LETTER I
163
+ "\u3046", # U+3046 HIRAGANA LETTER U
164
+ ]
165
+ table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
166
+ encoding = "cp932"
167
+ csv = (["message"] + messages).join("\n").encode(encoding)
168
+ assert_equal(table,
169
+ load_csv(Zlib::Deflate.deflate(csv),
170
+ schema: table.schema,
171
+ encoding: encoding,
172
+ compression: :gzip))
173
+ end
144
174
  end
145
175
  end
@@ -43,5 +43,30 @@ class DataTypeTest < Test::Unit::TestCase
43
43
  assert_equal(Arrow::ListDataType.new(field),
44
44
  Arrow::DataType.resolve(type: :list, field: field))
45
45
  end
46
+
47
+ test("_") do
48
+ assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
49
+ Arrow::DataType.resolve([:fixed_size_binary, 10]))
50
+ end
51
+ end
52
+
53
+ sub_test_case("instance methods") do
54
+ def setup
55
+ @data_type = Arrow::StringDataType.new
56
+ end
57
+
58
+ sub_test_case("#==") do
59
+ test("Arrow::DataType") do
60
+ assert do
61
+ @data_type == @data_type
62
+ end
63
+ end
64
+
65
+ test("not Arrow::DataType") do
66
+ assert do
67
+ not (@data_type == 29)
68
+ end
69
+ end
70
+ end
46
71
  end
47
72
  end
@@ -0,0 +1,64 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Decimal128Test < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @decimal128 = Arrow::Decimal128.new("10.1")
22
+ end
23
+
24
+ sub_test_case("#==") do
25
+ test("Arrow::Decimal128") do
26
+ assert do
27
+ @decimal128 == @decimal128
28
+ end
29
+ end
30
+
31
+ test("not Arrow::Decimal128") do
32
+ assert do
33
+ not (@decimal128 == 10.1)
34
+ end
35
+ end
36
+ end
37
+
38
+ sub_test_case("#!=") do
39
+ test("Arrow::Decimal128") do
40
+ assert do
41
+ not (@decimal128 != @decimal128)
42
+ end
43
+ end
44
+
45
+ test("not Arrow::Decimal128") do
46
+ assert do
47
+ @decimal128 != 10.1
48
+ end
49
+ end
50
+ end
51
+
52
+ sub_test_case("#to_s") do
53
+ test("default") do
54
+ assert_equal("101",
55
+ @decimal128.to_s)
56
+ end
57
+
58
+ test("scale") do
59
+ assert_equal("10.1",
60
+ @decimal128.to_s(1))
61
+ end
62
+ end
63
+ end
64
+ end
@@ -68,4 +68,24 @@ class FieldTest < Test::Unit::TestCase
68
68
  Arrow::Field.new(description).to_s)
69
69
  end
70
70
  end
71
+
72
+ sub_test_case("instance methods") do
73
+ def setup
74
+ @field = Arrow::Field.new("count", :uint32)
75
+ end
76
+
77
+ sub_test_case("#==") do
78
+ test("Arrow::Field") do
79
+ assert do
80
+ @field == @field
81
+ end
82
+ end
83
+
84
+ test("not Arrow::Field") do
85
+ assert do
86
+ not (@field == 29)
87
+ end
88
+ end
89
+ end
90
+ end
71
91
  end
@@ -43,8 +43,8 @@ class GroupTest < Test::Unit::TestCase
43
43
  table = Arrow::Table.new(raw_table)
44
44
  assert_equal(<<-TABLE, table.group(:time).count.to_s)
45
45
  time int
46
- 0 2018-01-29T00:00:00+09:00 1
47
- 1 2018-01-30T00:00:00+09:00 1
46
+ 0 #{time_values[0].iso8601} 1
47
+ 1 #{time_values[1].iso8601} 1
48
48
  TABLE
49
49
  end
50
50
  end
@@ -112,5 +112,14 @@ class RecordBatchBuilderTest < Test::Unit::TestCase
112
112
  arrays),
113
113
  @builder.flush)
114
114
  end
115
+
116
+ test("#column_builders") do
117
+ column_builders = [
118
+ @builder.get_column_builder(0),
119
+ @builder.get_column_builder(1),
120
+ ]
121
+ assert_equal(column_builders,
122
+ @builder.column_builders)
123
+ end
115
124
  end
116
125
  end
@@ -108,5 +108,19 @@ class RecordBatchTest < Test::Unit::TestCase
108
108
  assert_equal(Arrow::Table.new(@schema, [@counts]),
109
109
  @record_batch.to_table)
110
110
  end
111
+
112
+ sub_test_case("#==") do
113
+ test("Arrow::RecordBatch") do
114
+ assert do
115
+ @record_batch == @record_batch
116
+ end
117
+ end
118
+
119
+ test("not Arrow::RecordBatch") do
120
+ assert do
121
+ not (@record_batch == 29)
122
+ end
123
+ end
124
+ end
111
125
  end
112
126
  end
@@ -100,5 +100,19 @@ class SchemaTest < Test::Unit::TestCase
100
100
  end
101
101
  end
102
102
  end
103
+
104
+ sub_test_case("#==") do
105
+ test("Arrow::Schema") do
106
+ assert do
107
+ @schema == @schema
108
+ end
109
+ end
110
+
111
+ test("not Arrow::Schema") do
112
+ assert do
113
+ not (@schema == 29)
114
+ end
115
+ end
116
+ end
103
117
  end
104
118
  end
@@ -49,9 +49,22 @@ class StructArrayTest < Test::Unit::TestCase
49
49
  end
50
50
 
51
51
  test("#[]") do
52
- notify("TODO: Returns Arrow::Struct instead.")
53
- assert_equal([[true, false], [1, 2]],
54
- [@array[0].to_a, @array[1].to_a])
52
+ assert_equal([
53
+ Arrow::Struct.new(@array, 0),
54
+ Arrow::Struct.new(@array, 1),
55
+ ],
56
+ @array.to_a)
57
+ end
58
+
59
+ test("#get_value") do
60
+ assert_equal([
61
+ Arrow::Struct.new(@array, 0),
62
+ Arrow::Struct.new(@array, 1),
63
+ ],
64
+ [
65
+ @array.get_value(0),
66
+ @array.get_value(1),
67
+ ])
55
68
  end
56
69
 
57
70
  sub_test_case("#find_field") do
@@ -596,5 +596,19 @@ visible: false
596
596
  end
597
597
  end
598
598
  end
599
+
600
+ sub_test_case("#==") do
601
+ test("Arrow::Table") do
602
+ assert do
603
+ @table == @table
604
+ end
605
+ end
606
+
607
+ test("not Arrow::Table") do
608
+ assert do
609
+ not (@table == 29)
610
+ end
611
+ end
612
+ end
599
613
  end
600
614
  end