red-arrow 0.8.2 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +14 -11
  3. data/{doc/text/apache-2.0.txt → LICENSE.txt} +0 -0
  4. data/NOTICE.txt +2 -0
  5. data/README.md +27 -30
  6. data/Rakefile +15 -21
  7. data/dependency-check/Rakefile +15 -12
  8. data/doc/text/development.md +19 -0
  9. data/lib/arrow.rb +14 -11
  10. data/lib/arrow/array-builder.rb +55 -55
  11. data/lib/arrow/array.rb +15 -11
  12. data/lib/arrow/block-closable.rb +14 -11
  13. data/lib/arrow/chunked-array.rb +15 -11
  14. data/lib/arrow/column.rb +14 -11
  15. data/lib/arrow/csv-loader.rb +49 -20
  16. data/lib/arrow/csv-reader.rb +14 -11
  17. data/lib/arrow/date32-array-builder.rb +14 -11
  18. data/lib/arrow/date32-array.rb +14 -11
  19. data/lib/arrow/date64-array-builder.rb +15 -12
  20. data/lib/arrow/date64-array.rb +14 -11
  21. data/lib/arrow/field.rb +14 -11
  22. data/lib/arrow/group.rb +55 -13
  23. data/lib/arrow/loader.rb +16 -15
  24. data/lib/arrow/record-batch-file-reader.rb +14 -11
  25. data/lib/arrow/record-batch-stream-reader.rb +14 -11
  26. data/lib/arrow/record-batch.rb +14 -11
  27. data/lib/arrow/record-containable.rb +14 -11
  28. data/lib/arrow/record.rb +14 -11
  29. data/lib/arrow/rolling-window.rb +48 -0
  30. data/lib/arrow/slicer.rb +20 -14
  31. data/lib/arrow/struct-array.rb +24 -0
  32. data/lib/arrow/table-formatter.rb +15 -11
  33. data/lib/arrow/table-list-formatter.rb +15 -11
  34. data/lib/arrow/table-loader.rb +24 -11
  35. data/lib/arrow/table-saver.rb +14 -11
  36. data/lib/arrow/table-table-formatter.rb +15 -11
  37. data/lib/arrow/table.rb +48 -17
  38. data/lib/arrow/tensor.rb +14 -11
  39. data/lib/arrow/timestamp-array-builder.rb +16 -29
  40. data/lib/arrow/timestamp-array.rb +15 -30
  41. data/lib/arrow/version.rb +23 -12
  42. data/red-arrow.gemspec +30 -30
  43. data/test/fixture/TestOrcFile.test1.orc +0 -0
  44. data/test/fixture/float-integer.csv +20 -0
  45. data/test/fixture/integer-float.csv +20 -0
  46. data/test/fixture/null-with-double-quote.csv +16 -0
  47. data/test/fixture/null-without-double-quote.csv +16 -0
  48. data/test/fixture/with-header-float.csv +20 -0
  49. data/test/fixture/with-header.csv +16 -0
  50. data/test/fixture/without-header-float.csv +19 -0
  51. data/test/fixture/without-header.csv +16 -0
  52. data/test/helper.rb +16 -11
  53. data/test/helper/fixture.rb +14 -11
  54. data/test/run-test.rb +17 -12
  55. data/test/test-array-builder.rb +14 -11
  56. data/test/test-array.rb +14 -11
  57. data/test/test-chunked-array.rb +14 -11
  58. data/test/test-column.rb +14 -11
  59. data/test/test-csv-loader.rb +68 -49
  60. data/test/test-csv-reader.rb +17 -12
  61. data/test/test-date32-array.rb +14 -11
  62. data/test/test-date64-array.rb +14 -11
  63. data/test/test-group.rb +56 -11
  64. data/test/test-orc.rb +177 -0
  65. data/test/test-record-batch-file-reader.rb +14 -11
  66. data/test/test-record-batch.rb +14 -11
  67. data/test/test-rolling-window.rb +40 -0
  68. data/test/test-slicer.rb +14 -11
  69. data/test/test-struct-array.rb +36 -0
  70. data/test/test-table.rb +50 -22
  71. data/test/test-timestamp-array.rb +14 -11
  72. metadata +42 -57
  73. data/.yardopts +0 -6
  74. data/doc/text/news.md +0 -176
  75. data/lib/arrow/buffer.rb +0 -24
  76. data/lib/arrow/compatibility.rb +0 -28
  77. data/lib/arrow/data-type.rb +0 -81
@@ -1,22 +1,27 @@
1
- # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class CSVReaderTest < Test::Unit::TestCase
16
19
  include Helper::Fixture
17
20
 
18
21
  test("#read") do
19
- CSV.open(fixture_path("with-header.csv").to_s, headers: true) do |csv|
22
+ CSV.open(fixture_path("with-header.csv").to_s,
23
+ headers: true,
24
+ skip_lines: /^#/) do |csv|
20
25
  reader = Arrow::CSVReader.new(csv)
21
26
  assert_equal(<<-TABLE, reader.read.to_s)
22
27
  name score
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class Date32ArrayTest < Test::Unit::TestCase
16
19
  test("#[]") do
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class Date64ArrayTest < Test::Unit::TestCase
16
19
  test("#[]") do
@@ -1,16 +1,19 @@
1
- # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class GroupTest < Test::Unit::TestCase
16
19
  include Helper::Fixture
@@ -108,4 +111,46 @@ class GroupTest < Test::Unit::TestCase
108
111
  TABLE
109
112
  end
110
113
  end
114
+
115
+ sub_test_case("#min") do
116
+ test("single") do
117
+ assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
118
+ group_key1 group_key2 int uint float
119
+ 0 1 1 -2 1 2.200000
120
+ 1 2 1 3 3.300000
121
+ 2 3 1 -6 4 4.400000
122
+ TABLE
123
+ end
124
+
125
+ test("multiple") do
126
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
127
+ group_key1 group_key2 int uint float
128
+ 0 1 1 -2 1 2.200000
129
+ 1 2 1 3 3.300000
130
+ 2 3 1 -4 4 4.400000
131
+ 3 3 2 -6 5 5.500000
132
+ TABLE
133
+ end
134
+ end
135
+
136
+ sub_test_case("#max") do
137
+ test("single") do
138
+ assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
139
+ group_key1 group_key2 int uint float
140
+ 0 1 1 -1 1 2.200000
141
+ 1 2 1 3 3.300000
142
+ 2 3 2 -4 6 6.600000
143
+ TABLE
144
+ end
145
+
146
+ test("multiple") do
147
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
148
+ group_key1 group_key2 int uint float
149
+ 0 1 1 -1 1 2.200000
150
+ 1 2 1 3 3.300000
151
+ 2 3 1 -4 4 4.400000
152
+ 3 3 2 -5 6 6.600000
153
+ TABLE
154
+ end
155
+ end
111
156
  end
@@ -0,0 +1,177 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ORCTest < Test::Unit::TestCase
19
+ include Helper::Fixture
20
+
21
+ def setup
22
+ omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader)
23
+ @orc_path = fixture_path("TestOrcFile.test1.orc")
24
+ end
25
+
26
+ def pp_values(values)
27
+ "[\n " + values.collect(&:inspect).join(",\n ") + "\n]"
28
+ end
29
+
30
+ sub_test_case("load") do
31
+ test("default") do
32
+ table = Arrow::Table.load(@orc_path)
33
+ dump = table.columns.collect do |column|
34
+ [
35
+ column.field.to_s,
36
+ column.data.chunks.collect(&:to_s),
37
+ ]
38
+ end
39
+ assert_equal([
40
+ ["boolean1: bool", [pp_values([false, true])]],
41
+ ["byte1: int8", [pp_values([1, 100])]],
42
+ ["short1: int16", [pp_values([1024, 2048])]],
43
+ ["int1: int32", [pp_values([65536, 65536])]],
44
+ [
45
+ "long1: int64",
46
+ [pp_values([9223372036854775807, 9223372036854775807])],
47
+ ],
48
+ ["float1: float", [pp_values([1, 2])]],
49
+ ["double1: double", [pp_values([-15, -5])]],
50
+ ["bytes1: binary", ["[\n 0001020304,\n \n]"]],
51
+ ["string1: string", [pp_values(["hi", "bye"])]],
52
+ [
53
+ "middle: " +
54
+ "struct<list: " +
55
+ "list<item: struct<int1: int32, string1: string>>>",
56
+ [
57
+ <<-STRUCT.chomp
58
+ -- is_valid: all not null
59
+ -- child 0 type: list<item: struct<int1: int32, string1: string>>
60
+ [
61
+ -- is_valid: all not null
62
+ -- child 0 type: int32
63
+ [
64
+ 1,
65
+ 2
66
+ ]
67
+ -- child 1 type: string
68
+ [
69
+ "bye",
70
+ "sigh"
71
+ ],
72
+ -- is_valid: all not null
73
+ -- child 0 type: int32
74
+ [
75
+ 1,
76
+ 2
77
+ ]
78
+ -- child 1 type: string
79
+ [
80
+ "bye",
81
+ "sigh"
82
+ ]
83
+ ]
84
+ STRUCT
85
+ ]
86
+ ],
87
+ [
88
+ "list: list<item: struct<int1: int32, string1: string>>",
89
+ [
90
+ <<-LIST.chomp
91
+ [
92
+ -- is_valid: all not null
93
+ -- child 0 type: int32
94
+ [
95
+ 3,
96
+ 4
97
+ ]
98
+ -- child 1 type: string
99
+ [
100
+ "good",
101
+ "bad"
102
+ ],
103
+ -- is_valid: all not null
104
+ -- child 0 type: int32
105
+ [
106
+ 100000000,
107
+ -100000,
108
+ 1234
109
+ ]
110
+ -- child 1 type: string
111
+ [
112
+ "cat",
113
+ "in",
114
+ "hat"
115
+ ]
116
+ ]
117
+ LIST
118
+ ]
119
+ ],
120
+ [
121
+ "map: list<item: " +
122
+ "struct<key: string, value: " +
123
+ "struct<int1: int32, string1: string>>>",
124
+ [
125
+ <<-MAP.chomp
126
+ [
127
+ -- is_valid: all not null
128
+ -- child 0 type: string
129
+ []
130
+ -- child 1 type: struct<int1: int32, string1: string>
131
+ -- is_valid: all not null
132
+ -- child 0 type: int32
133
+ []
134
+ -- child 1 type: string
135
+ [],
136
+ -- is_valid: all not null
137
+ -- child 0 type: string
138
+ [
139
+ "chani",
140
+ "mauddib"
141
+ ]
142
+ -- child 1 type: struct<int1: int32, string1: string>
143
+ -- is_valid: all not null
144
+ -- child 0 type: int32
145
+ [
146
+ 5,
147
+ 1
148
+ ]
149
+ -- child 1 type: string
150
+ [
151
+ "chani",
152
+ "mauddib"
153
+ ]
154
+ ]
155
+ MAP
156
+ ],
157
+ ],
158
+ ],
159
+ dump)
160
+ end
161
+
162
+ test(":field_indexes") do
163
+ table = Arrow::Table.load(@orc_path, field_indexes: [1, 3])
164
+ dump = table.columns.collect do |column|
165
+ [
166
+ column.field.to_s,
167
+ column.data.chunks.collect(&:to_s),
168
+ ]
169
+ end
170
+ assert_equal([
171
+ ["boolean1: bool", [pp_values([false, true])]],
172
+ ["short1: int16", [pp_values([1024, 2048])]],
173
+ ],
174
+ dump)
175
+ end
176
+ end
177
+ end
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class RecordBatchFileReaderTest < Test::Unit::TestCase
16
19
  test("write/read") do
@@ -1,16 +1,19 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
2
8
  #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
6
10
  #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
14
17
 
15
18
  class RecordBatchTest < Test::Unit::TestCase
16
19
  sub_test_case(".each") do
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RollingWindowTest < Test::Unit::TestCase
19
+ include Helper::Fixture
20
+
21
+ def setup
22
+ raw_table = {
23
+ :number => Arrow::Int32Array.new([1, -2, nil, 4, 6, 3]),
24
+ }
25
+ @table = Arrow::Table.new(raw_table)
26
+ end
27
+
28
+ test("#lag") do
29
+ assert_equal(<<-ARRAY.chomp, @table.window.lag(:number).to_s)
30
+ [
31
+ null,
32
+ -3,
33
+ null,
34
+ null,
35
+ 2,
36
+ -3
37
+ ]
38
+ ARRAY
39
+ end
40
+ end