red-arrow 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/development.md +15 -0
- data/doc/text/news.md +14 -0
- data/lib/arrow/array-builder.rb +60 -5
- data/lib/arrow/csv-loader.rb +206 -0
- data/lib/arrow/csv-reader.rb +6 -117
- data/lib/arrow/date32-array-builder.rb +29 -0
- data/lib/arrow/date32-array.rb +27 -0
- data/lib/arrow/date64-array-builder.rb +30 -0
- data/lib/arrow/date64-array.rb +26 -0
- data/lib/arrow/loader.rb +10 -1
- data/lib/arrow/record-batch.rb +11 -37
- data/lib/arrow/record-containable.rb +70 -0
- data/lib/arrow/record.rb +20 -5
- data/lib/arrow/table-loader.rb +117 -0
- data/lib/arrow/table-saver.rb +93 -0
- data/lib/arrow/table.rb +18 -25
- data/lib/arrow/timestamp-array-builder.rb +59 -0
- data/lib/arrow/version.rb +2 -2
- data/test/test-array-builder.rb +80 -42
- data/test/test-csv-loader.rb +79 -0
- data/test/test-csv-reader.rb +5 -66
- data/test/test-date32-array.rb +21 -0
- data/test/test-date64-array.rb +22 -0
- data/test/test-table.rb +64 -10
- metadata +18 -2
@@ -0,0 +1,93 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class TableSaver
|
17
|
+
class << self
|
18
|
+
def save(table, path, options={})
|
19
|
+
new(table, path, options).save
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(table, path, options={})
|
24
|
+
@table = table
|
25
|
+
@path = path
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
def save
|
30
|
+
path = @path
|
31
|
+
path = path.to_path if path.respond_to?(:to_path)
|
32
|
+
format = @options[:format] || guess_format(path) || :arrow
|
33
|
+
|
34
|
+
custom_save_method = "save_as_#{format}"
|
35
|
+
unless respond_to?(custom_save_method, true)
|
36
|
+
available_formats = []
|
37
|
+
(methods(true) | private_methods(true)).each do |name|
|
38
|
+
match_data = /\Asave_as_/.match(name.to_s)
|
39
|
+
if match_data
|
40
|
+
available_formats << match_data.post_match
|
41
|
+
end
|
42
|
+
end
|
43
|
+
message = "Arrow::Table save format must be one of ["
|
44
|
+
message << available_formats.join(", ")
|
45
|
+
message << "]: #{format.inspect}"
|
46
|
+
raise ArgumentError, message
|
47
|
+
end
|
48
|
+
__send__(custom_save_method, path)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def guess_format(path)
|
53
|
+
extension = ::File.extname(path).gsub(/\A\./, "").downcase
|
54
|
+
return nil if extension.empty?
|
55
|
+
|
56
|
+
return extension if respond_to?("save_as_#{extension}", true)
|
57
|
+
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def save_raw(writer_class, path)
|
62
|
+
FileOutputStream.open(path, false) do |output|
|
63
|
+
writer_class.open(output, @table.schema) do |writer|
|
64
|
+
writer.write_table(@table)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def save_as_arrow(path)
|
70
|
+
save_as_batch(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
def save_as_batch(path)
|
74
|
+
save_raw(RecordBatchFileWriter, path)
|
75
|
+
end
|
76
|
+
|
77
|
+
def save_as_stream(path)
|
78
|
+
save_raw(RecordBatchStreamWriter, path)
|
79
|
+
end
|
80
|
+
|
81
|
+
def save_as_csv(path)
|
82
|
+
CSV.open(path, "w") do |csv|
|
83
|
+
names = @table.schema.fields.collect(&:name)
|
84
|
+
csv << names
|
85
|
+
@table.each_record(reuse_record: true) do |record|
|
86
|
+
csv << names.collect do |name|
|
87
|
+
record[name]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
data/lib/arrow/table.rb
CHANGED
@@ -12,8 +12,18 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
require "arrow/record-containable"
|
16
|
+
|
15
17
|
module Arrow
|
16
18
|
class Table
|
19
|
+
include RecordContainable
|
20
|
+
|
21
|
+
class << self
|
22
|
+
def load(path, options={})
|
23
|
+
TableLoader.load(path, options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
17
27
|
alias_method :initialize_raw, :initialize
|
18
28
|
def initialize(schema_or_raw_table_or_columns, columns=nil)
|
19
29
|
if columns.nil?
|
@@ -38,16 +48,8 @@ module Arrow
|
|
38
48
|
initialize_raw(schema, columns)
|
39
49
|
end
|
40
50
|
|
41
|
-
def each_column
|
42
|
-
return to_enum(__method__) unless block_given?
|
43
|
-
|
44
|
-
n_columns.times do |i|
|
45
|
-
yield(get_column(i))
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
51
|
def columns
|
50
|
-
@columns ||=
|
52
|
+
@columns ||= n_columns.times.collect {|i| get_column(i)}
|
51
53
|
end
|
52
54
|
|
53
55
|
def each_record_batch
|
@@ -64,19 +66,12 @@ module Arrow
|
|
64
66
|
# @return [Arrow::Column, Array<Arrow::Column>, nil]
|
65
67
|
def [](*args)
|
66
68
|
if args.size == 1
|
67
|
-
|
68
|
-
when String, Symbol
|
69
|
-
find_column(args[0])
|
70
|
-
else
|
71
|
-
message = "#{self.class}\#[#{args[0].inspect}]: " +
|
72
|
-
"Must be String or Symbol"
|
73
|
-
raise ArgumentError, message
|
74
|
-
end
|
69
|
+
find_column(args[0])
|
75
70
|
else
|
76
71
|
new_columns = args.collect do |column_name|
|
77
72
|
column = find_column(column_name)
|
78
73
|
if column.nil?
|
79
|
-
message = "
|
74
|
+
message = "unknown column: <#{column_name.inspect}>: #{inspect}"
|
80
75
|
raise ArgumentError, message
|
81
76
|
end
|
82
77
|
column
|
@@ -246,6 +241,11 @@ module Arrow
|
|
246
241
|
self.class.new(selected_columns)
|
247
242
|
end
|
248
243
|
|
244
|
+
def save(path, options={})
|
245
|
+
saver = TableSaver.new(self, path, options)
|
246
|
+
saver.save
|
247
|
+
end
|
248
|
+
|
249
249
|
def to_s(options={})
|
250
250
|
formatter = TableFormatter.new(self, options)
|
251
251
|
formatter.format
|
@@ -269,13 +269,6 @@ module Arrow
|
|
269
269
|
end
|
270
270
|
|
271
271
|
private
|
272
|
-
def find_column(name)
|
273
|
-
name = name.to_s
|
274
|
-
columns.find do |column|
|
275
|
-
column.name == name
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
272
|
def slice_by_ranges(ranges)
|
280
273
|
sliced_columns = columns.collect do |column|
|
281
274
|
chunks = []
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class TimestampArrayBuilder
|
17
|
+
# TODO: Workaround for Apache Arrow GLib 0.8.0
|
18
|
+
alias_method :initialize_raw, :initialize
|
19
|
+
def initialize(data_type)
|
20
|
+
initialize_raw(data_type)
|
21
|
+
data_type_name = data_type.to_s
|
22
|
+
if data_type_name.end_with?("[s]")
|
23
|
+
@unit_id = :second
|
24
|
+
elsif data_type_name.end_with?("[ms]")
|
25
|
+
@unit_id = :milli
|
26
|
+
elsif data_type_name.end_with?("[us]")
|
27
|
+
@unit_id = :micro
|
28
|
+
else
|
29
|
+
@unit_id = :nano
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def unit_id
|
35
|
+
@unit_id ||= unit.nick.to_sym
|
36
|
+
end
|
37
|
+
|
38
|
+
def convert_to_arrow_value(value)
|
39
|
+
if value.respond_to?(:to_time) and not value.is_a?(Time)
|
40
|
+
value = value.to_time
|
41
|
+
end
|
42
|
+
|
43
|
+
if value.is_a?(Time)
|
44
|
+
case unit_id
|
45
|
+
when :second
|
46
|
+
value.to_i
|
47
|
+
when :milli
|
48
|
+
value.to_i * 1_000 + value.usec / 1000
|
49
|
+
when :micro
|
50
|
+
value.to_i * 1_000_000 + value.usec
|
51
|
+
else
|
52
|
+
value.to_i * 1_000_000_000 + value.nsec
|
53
|
+
end
|
54
|
+
else
|
55
|
+
value
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/arrow/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,5 +13,5 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
module Arrow
|
16
|
-
VERSION = "0.8.
|
16
|
+
VERSION = "0.8.1"
|
17
17
|
end
|
data/test/test-array-builder.rb
CHANGED
@@ -14,58 +14,96 @@
|
|
14
14
|
|
15
15
|
class ArrayBuilderTest < Test::Unit::TestCase
|
16
16
|
sub_test_case(".build") do
|
17
|
-
|
18
|
-
array =
|
19
|
-
assert_equal(
|
20
|
-
array.to_a)
|
17
|
+
def assert_build(builder_class, raw_array)
|
18
|
+
array = builder_class.build(raw_array)
|
19
|
+
assert_equal(raw_array, array.to_a)
|
21
20
|
end
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
sub_test_case("generic builder") do
|
23
|
+
test("strings") do
|
24
|
+
assert_build(Arrow::ArrayBuilder,
|
25
|
+
["Hello", nil, "World"])
|
26
|
+
end
|
28
27
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
28
|
+
test("boolean") do
|
29
|
+
assert_build(Arrow::ArrayBuilder,
|
30
|
+
[true, nil, false])
|
31
|
+
end
|
34
32
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
33
|
+
test("positive integers") do
|
34
|
+
assert_build(Arrow::ArrayBuilder,
|
35
|
+
[1, nil, 2, nil, 3])
|
36
|
+
end
|
40
37
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
end
|
38
|
+
test("negative integers") do
|
39
|
+
assert_build(Arrow::ArrayBuilder,
|
40
|
+
[nil, -1, nil, -2, nil, -3])
|
41
|
+
end
|
46
42
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
43
|
+
test("times") do
|
44
|
+
assert_build(Arrow::ArrayBuilder,
|
45
|
+
[Time.at(0), Time.at(1), Time.at(2)])
|
46
|
+
end
|
52
47
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
48
|
+
test("dates") do
|
49
|
+
assert_build(Arrow::ArrayBuilder,
|
50
|
+
[Date.new(2018, 1, 4), Date.new(2018, 1, 5)])
|
51
|
+
end
|
58
52
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
53
|
+
test("datetimes") do
|
54
|
+
assert_build(Arrow::ArrayBuilder,
|
55
|
+
[
|
56
|
+
DateTime.new(2018, 1, 4, 23, 18, 23),
|
57
|
+
DateTime.new(2018, 1, 5, 0, 23, 21),
|
58
|
+
])
|
59
|
+
end
|
63
60
|
end
|
64
61
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
62
|
+
sub_test_case("specific builder") do
|
63
|
+
test("empty") do
|
64
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
65
|
+
[])
|
66
|
+
end
|
67
|
+
|
68
|
+
test("values") do
|
69
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
70
|
+
[1, -2])
|
71
|
+
end
|
72
|
+
|
73
|
+
test("values, nils") do
|
74
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
75
|
+
[1, -2, nil, nil])
|
76
|
+
end
|
77
|
+
|
78
|
+
test("values, nils, values") do
|
79
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
80
|
+
[1, -2, nil, nil, 3, -4])
|
81
|
+
end
|
82
|
+
|
83
|
+
test("values, nils, values, nils") do
|
84
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
85
|
+
[1, -2, nil, nil, 3, -4, nil, nil])
|
86
|
+
end
|
87
|
+
|
88
|
+
test("nils") do
|
89
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
90
|
+
[nil, nil])
|
91
|
+
end
|
92
|
+
|
93
|
+
test("nils, values") do
|
94
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
95
|
+
[nil, nil, 3, -4])
|
96
|
+
end
|
97
|
+
|
98
|
+
test("nils, values, nil") do
|
99
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
100
|
+
[nil, nil, 3, -4, nil, nil])
|
101
|
+
end
|
102
|
+
|
103
|
+
test("nils, values, nil, values") do
|
104
|
+
assert_build(Arrow::Int32ArrayBuilder,
|
105
|
+
[nil, nil, 3, -4, nil, nil, 5, -6])
|
106
|
+
end
|
69
107
|
end
|
70
108
|
end
|
71
109
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
class CSVLoaderTest < Test::Unit::TestCase
|
16
|
+
include Helper::Fixture
|
17
|
+
|
18
|
+
sub_test_case(".load") do
|
19
|
+
test("String: data: with header") do
|
20
|
+
data = fixture_path("with-header.csv").read
|
21
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
|
22
|
+
name score
|
23
|
+
0 alice 10
|
24
|
+
1 bob 29
|
25
|
+
2 chris -1
|
26
|
+
TABLE
|
27
|
+
end
|
28
|
+
|
29
|
+
test("String: data: without header") do
|
30
|
+
data = fixture_path("without-header.csv").read
|
31
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
|
32
|
+
0 1
|
33
|
+
0 alice 10
|
34
|
+
1 bob 29
|
35
|
+
2 chris -1
|
36
|
+
TABLE
|
37
|
+
end
|
38
|
+
|
39
|
+
test("String: path: with header") do
|
40
|
+
path = fixture_path("with-header.csv").to_s
|
41
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
42
|
+
name score
|
43
|
+
0 alice 10
|
44
|
+
1 bob 29
|
45
|
+
2 chris -1
|
46
|
+
TABLE
|
47
|
+
end
|
48
|
+
|
49
|
+
test("String: path: without header") do
|
50
|
+
path = fixture_path("without-header.csv").to_s
|
51
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
52
|
+
0 1
|
53
|
+
0 alice 10
|
54
|
+
1 bob 29
|
55
|
+
2 chris -1
|
56
|
+
TABLE
|
57
|
+
end
|
58
|
+
|
59
|
+
test("Pathname: with header") do
|
60
|
+
path = fixture_path("with-header.csv")
|
61
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
62
|
+
name score
|
63
|
+
0 alice 10
|
64
|
+
1 bob 29
|
65
|
+
2 chris -1
|
66
|
+
TABLE
|
67
|
+
end
|
68
|
+
|
69
|
+
test("Pathname: without header") do
|
70
|
+
path = fixture_path("without-header.csv")
|
71
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
72
|
+
0 1
|
73
|
+
0 alice 10
|
74
|
+
1 bob 29
|
75
|
+
2 chris -1
|
76
|
+
TABLE
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|