red-arrow 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +14 -11
  3. data/{doc/text/apache-2.0.txt → LICENSE.txt} +0 -0
  4. data/NOTICE.txt +2 -0
  5. data/README.md +27 -30
  6. data/Rakefile +15 -21
  7. data/dependency-check/Rakefile +15 -12
  8. data/doc/text/development.md +19 -0
  9. data/lib/arrow.rb +14 -11
  10. data/lib/arrow/array-builder.rb +55 -55
  11. data/lib/arrow/array.rb +15 -11
  12. data/lib/arrow/block-closable.rb +14 -11
  13. data/lib/arrow/chunked-array.rb +15 -11
  14. data/lib/arrow/column.rb +14 -11
  15. data/lib/arrow/csv-loader.rb +49 -20
  16. data/lib/arrow/csv-reader.rb +14 -11
  17. data/lib/arrow/date32-array-builder.rb +14 -11
  18. data/lib/arrow/date32-array.rb +14 -11
  19. data/lib/arrow/date64-array-builder.rb +15 -12
  20. data/lib/arrow/date64-array.rb +14 -11
  21. data/lib/arrow/field.rb +14 -11
  22. data/lib/arrow/group.rb +55 -13
  23. data/lib/arrow/loader.rb +16 -15
  24. data/lib/arrow/record-batch-file-reader.rb +14 -11
  25. data/lib/arrow/record-batch-stream-reader.rb +14 -11
  26. data/lib/arrow/record-batch.rb +14 -11
  27. data/lib/arrow/record-containable.rb +14 -11
  28. data/lib/arrow/record.rb +14 -11
  29. data/lib/arrow/rolling-window.rb +48 -0
  30. data/lib/arrow/slicer.rb +20 -14
  31. data/lib/arrow/struct-array.rb +24 -0
  32. data/lib/arrow/table-formatter.rb +15 -11
  33. data/lib/arrow/table-list-formatter.rb +15 -11
  34. data/lib/arrow/table-loader.rb +24 -11
  35. data/lib/arrow/table-saver.rb +14 -11
  36. data/lib/arrow/table-table-formatter.rb +15 -11
  37. data/lib/arrow/table.rb +48 -17
  38. data/lib/arrow/tensor.rb +14 -11
  39. data/lib/arrow/timestamp-array-builder.rb +16 -29
  40. data/lib/arrow/timestamp-array.rb +15 -30
  41. data/lib/arrow/version.rb +23 -12
  42. data/red-arrow.gemspec +30 -30
  43. data/test/fixture/TestOrcFile.test1.orc +0 -0
  44. data/test/fixture/float-integer.csv +20 -0
  45. data/test/fixture/integer-float.csv +20 -0
  46. data/test/fixture/null-with-double-quote.csv +16 -0
  47. data/test/fixture/null-without-double-quote.csv +16 -0
  48. data/test/fixture/with-header-float.csv +20 -0
  49. data/test/fixture/with-header.csv +16 -0
  50. data/test/fixture/without-header-float.csv +19 -0
  51. data/test/fixture/without-header.csv +16 -0
  52. data/test/helper.rb +16 -11
  53. data/test/helper/fixture.rb +14 -11
  54. data/test/run-test.rb +17 -12
  55. data/test/test-array-builder.rb +14 -11
  56. data/test/test-array.rb +14 -11
  57. data/test/test-chunked-array.rb +14 -11
  58. data/test/test-column.rb +14 -11
  59. data/test/test-csv-loader.rb +68 -49
  60. data/test/test-csv-reader.rb +17 -12
  61. data/test/test-date32-array.rb +14 -11
  62. data/test/test-date64-array.rb +14 -11
  63. data/test/test-group.rb +56 -11
  64. data/test/test-orc.rb +177 -0
  65. data/test/test-record-batch-file-reader.rb +14 -11
  66. data/test/test-record-batch.rb +14 -11
  67. data/test/test-rolling-window.rb +40 -0
  68. data/test/test-slicer.rb +14 -11
  69. data/test/test-struct-array.rb +36 -0
  70. data/test/test-table.rb +50 -22
  71. data/test/test-timestamp-array.rb +14 -11
  72. metadata +42 -57
  73. data/.yardopts +0 -6
  74. data/doc/text/news.md +0 -176
  75. data/lib/arrow/buffer.rb +0 -24
  76. data/lib/arrow/compatibility.rb +0 -28
  77. data/lib/arrow/data-type.rb +0 -81
@@ -1,22 +1,27 @@
1
- # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class CSVReaderTest < Test::Unit::TestCase
16
19
  include Helper::Fixture
17
20
 
18
21
  test("#read") do
19
- CSV.open(fixture_path("with-header.csv").to_s, headers: true) do |csv|
22
+ CSV.open(fixture_path("with-header.csv").to_s,
23
+ headers: true,
24
+ skip_lines: /^#/) do |csv|
20
25
  reader = Arrow::CSVReader.new(csv)
21
26
  assert_equal(<<-TABLE, reader.read.to_s)
22
27
  name score
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class Date32ArrayTest < Test::Unit::TestCase
16
19
  test("#[]") do
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class Date64ArrayTest < Test::Unit::TestCase
16
19
  test("#[]") do
@@ -1,16 +1,19 @@
1
- # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class GroupTest < Test::Unit::TestCase
16
19
  include Helper::Fixture
@@ -108,4 +111,46 @@ class GroupTest < Test::Unit::TestCase
108
111
  TABLE
109
112
  end
110
113
  end
114
+
115
+ sub_test_case("#min") do
116
+ test("single") do
117
+ assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
118
+ group_key1 group_key2 int uint float
119
+ 0 1 1 -2 1 2.200000
120
+ 1 2 1 3 3.300000
121
+ 2 3 1 -6 4 4.400000
122
+ TABLE
123
+ end
124
+
125
+ test("multiple") do
126
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
127
+ group_key1 group_key2 int uint float
128
+ 0 1 1 -2 1 2.200000
129
+ 1 2 1 3 3.300000
130
+ 2 3 1 -4 4 4.400000
131
+ 3 3 2 -6 5 5.500000
132
+ TABLE
133
+ end
134
+ end
135
+
136
+ sub_test_case("#max") do
137
+ test("single") do
138
+ assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
139
+ group_key1 group_key2 int uint float
140
+ 0 1 1 -1 1 2.200000
141
+ 1 2 1 3 3.300000
142
+ 2 3 2 -4 6 6.600000
143
+ TABLE
144
+ end
145
+
146
+ test("multiple") do
147
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
148
+ group_key1 group_key2 int uint float
149
+ 0 1 1 -1 1 2.200000
150
+ 1 2 1 3 3.300000
151
+ 2 3 1 -4 4 4.400000
152
+ 3 3 2 -5 6 6.600000
153
+ TABLE
154
+ end
155
+ end
111
156
  end
@@ -0,0 +1,177 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ORCTest < Test::Unit::TestCase
19
+ include Helper::Fixture
20
+
21
+ def setup
22
+ omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader)
23
+ @orc_path = fixture_path("TestOrcFile.test1.orc")
24
+ end
25
+
26
+ def pp_values(values)
27
+ "[\n " + values.collect(&:inspect).join(",\n ") + "\n]"
28
+ end
29
+
30
+ sub_test_case("load") do
31
+ test("default") do
32
+ table = Arrow::Table.load(@orc_path)
33
+ dump = table.columns.collect do |column|
34
+ [
35
+ column.field.to_s,
36
+ column.data.chunks.collect(&:to_s),
37
+ ]
38
+ end
39
+ assert_equal([
40
+ ["boolean1: bool", [pp_values([false, true])]],
41
+ ["byte1: int8", [pp_values([1, 100])]],
42
+ ["short1: int16", [pp_values([1024, 2048])]],
43
+ ["int1: int32", [pp_values([65536, 65536])]],
44
+ [
45
+ "long1: int64",
46
+ [pp_values([9223372036854775807, 9223372036854775807])],
47
+ ],
48
+ ["float1: float", [pp_values([1, 2])]],
49
+ ["double1: double", [pp_values([-15, -5])]],
50
+ ["bytes1: binary", ["[\n 0001020304,\n \n]"]],
51
+ ["string1: string", [pp_values(["hi", "bye"])]],
52
+ [
53
+ "middle: " +
54
+ "struct<list: " +
55
+ "list<item: struct<int1: int32, string1: string>>>",
56
+ [
57
+ <<-STRUCT.chomp
58
+ -- is_valid: all not null
59
+ -- child 0 type: list<item: struct<int1: int32, string1: string>>
60
+ [
61
+ -- is_valid: all not null
62
+ -- child 0 type: int32
63
+ [
64
+ 1,
65
+ 2
66
+ ]
67
+ -- child 1 type: string
68
+ [
69
+ "bye",
70
+ "sigh"
71
+ ],
72
+ -- is_valid: all not null
73
+ -- child 0 type: int32
74
+ [
75
+ 1,
76
+ 2
77
+ ]
78
+ -- child 1 type: string
79
+ [
80
+ "bye",
81
+ "sigh"
82
+ ]
83
+ ]
84
+ STRUCT
85
+ ]
86
+ ],
87
+ [
88
+ "list: list<item: struct<int1: int32, string1: string>>",
89
+ [
90
+ <<-LIST.chomp
91
+ [
92
+ -- is_valid: all not null
93
+ -- child 0 type: int32
94
+ [
95
+ 3,
96
+ 4
97
+ ]
98
+ -- child 1 type: string
99
+ [
100
+ "good",
101
+ "bad"
102
+ ],
103
+ -- is_valid: all not null
104
+ -- child 0 type: int32
105
+ [
106
+ 100000000,
107
+ -100000,
108
+ 1234
109
+ ]
110
+ -- child 1 type: string
111
+ [
112
+ "cat",
113
+ "in",
114
+ "hat"
115
+ ]
116
+ ]
117
+ LIST
118
+ ]
119
+ ],
120
+ [
121
+ "map: list<item: " +
122
+ "struct<key: string, value: " +
123
+ "struct<int1: int32, string1: string>>>",
124
+ [
125
+ <<-MAP.chomp
126
+ [
127
+ -- is_valid: all not null
128
+ -- child 0 type: string
129
+ []
130
+ -- child 1 type: struct<int1: int32, string1: string>
131
+ -- is_valid: all not null
132
+ -- child 0 type: int32
133
+ []
134
+ -- child 1 type: string
135
+ [],
136
+ -- is_valid: all not null
137
+ -- child 0 type: string
138
+ [
139
+ "chani",
140
+ "mauddib"
141
+ ]
142
+ -- child 1 type: struct<int1: int32, string1: string>
143
+ -- is_valid: all not null
144
+ -- child 0 type: int32
145
+ [
146
+ 5,
147
+ 1
148
+ ]
149
+ -- child 1 type: string
150
+ [
151
+ "chani",
152
+ "mauddib"
153
+ ]
154
+ ]
155
+ MAP
156
+ ],
157
+ ],
158
+ ],
159
+ dump)
160
+ end
161
+
162
+ test(":field_indexes") do
163
+ table = Arrow::Table.load(@orc_path, field_indexes: [1, 3])
164
+ dump = table.columns.collect do |column|
165
+ [
166
+ column.field.to_s,
167
+ column.data.chunks.collect(&:to_s),
168
+ ]
169
+ end
170
+ assert_equal([
171
+ ["boolean1: bool", [pp_values([false, true])]],
172
+ ["short1: int16", [pp_values([1024, 2048])]],
173
+ ],
174
+ dump)
175
+ end
176
+ end
177
+ end
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class RecordBatchFileReaderTest < Test::Unit::TestCase
16
19
  test("write/read") do
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class RecordBatchTest < Test::Unit::TestCase
16
19
  sub_test_case(".each") do
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RollingWindowTest < Test::Unit::TestCase
19
+ include Helper::Fixture
20
+
21
+ def setup
22
+ raw_table = {
23
+ :number => Arrow::Int32Array.new([1, -2, nil, 4, 6, 3]),
24
+ }
25
+ @table = Arrow::Table.new(raw_table)
26
+ end
27
+
28
+ test("#lag") do
29
+ assert_equal(<<-ARRAY.chomp, @table.window.lag(:number).to_s)
30
+ [
31
+ null,
32
+ -3,
33
+ null,
34
+ null,
35
+ 2,
36
+ -3
37
+ ]
38
+ ARRAY
39
+ end
40
+ end