red-parquet 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +8 -0
- data/lib/parquet/arrow-table-loadable.rb +30 -0
- data/lib/parquet/arrow-table-savable.rb +31 -0
- data/lib/parquet/loader.rb +10 -1
- data/lib/parquet/version.rb +1 -1
- data/red-parquet.gemspec +1 -1
- data/test/test-arrow-table.rb +45 -0
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f182f603bf41836b43670bbb064b30570ed88270
|
4
|
+
data.tar.gz: 3707be9a6e4415441e91a5bb200225487c41123f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 977fa271681371c5e0a827ac55287d7dd3431649f417b61993852fcac0fadda05c97dc9283933be993b98f3c36c3486f457e292b7f35fcccc25ce1d8f3b1ab89
|
7
|
+
data.tar.gz: 9e44068de2e857804b6ea3d3d25e0c1f2c17c920bc26271092818f53bb61e88c0f70775a866b8f0d31503a17f40e1e549bbbe322b94a45eb408ba4875b1b886d
|
data/doc/text/news.md
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Parquet
|
16
|
+
module ArrowTableLoadable
|
17
|
+
private
|
18
|
+
def load_as_parquet(path)
|
19
|
+
reader = Parquet::ArrowFileReader.new(path)
|
20
|
+
reader.n_threads = @options[:n_threads] || 4 # TODO
|
21
|
+
reader.read_table
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module Arrow
|
27
|
+
class TableLoader
|
28
|
+
include Parquet::ArrowTableLoadable
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Parquet
|
16
|
+
module ArrowTableSavable
|
17
|
+
private
|
18
|
+
def save_as_parquet(path)
|
19
|
+
chunk_size = @options[:chunk_size] || 1024 # TODO
|
20
|
+
Parquet::ArrowFileWriter.open(@table.schema, path) do |writer|
|
21
|
+
writer.write_table(@table, chunk_size)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module Arrow
|
28
|
+
class TableSaver
|
29
|
+
include Parquet::ArrowTableSavable
|
30
|
+
end
|
31
|
+
end
|
data/lib/parquet/loader.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -21,6 +21,15 @@ module Parquet
|
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
|
+
def post_load(repository, namespace)
|
25
|
+
require_libraries
|
26
|
+
end
|
27
|
+
|
28
|
+
def require_libraries
|
29
|
+
require "parquet/arrow-table-loadable"
|
30
|
+
require "parquet/arrow-table-savable"
|
31
|
+
end
|
32
|
+
|
24
33
|
def load_object_info(info)
|
25
34
|
super
|
26
35
|
|
data/lib/parquet/version.rb
CHANGED
data/red-parquet.gemspec
CHANGED
@@ -42,7 +42,7 @@ Gem::Specification.new do |spec|
|
|
42
42
|
spec.test_files += Dir.glob("test/**/*")
|
43
43
|
spec.extensions = ["dependency-check/Rakefile"]
|
44
44
|
|
45
|
-
spec.add_runtime_dependency("red-arrow")
|
45
|
+
spec.add_runtime_dependency("red-arrow", ">= 0.8.1")
|
46
46
|
|
47
47
|
spec.add_development_dependency("bundler")
|
48
48
|
spec.add_development_dependency("rake")
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
class TestArrowTableReader < Test::Unit::TestCase
|
16
|
+
def setup
|
17
|
+
@count_field = Arrow::Field.new("count", :uint8)
|
18
|
+
@visible_field = Arrow::Field.new("visible", :boolean)
|
19
|
+
schema = Arrow::Schema.new([@count_field, @visible_field])
|
20
|
+
count_arrays = [
|
21
|
+
Arrow::UInt8Array.new([1, 2]),
|
22
|
+
Arrow::UInt8Array.new([4, 8, 16]),
|
23
|
+
Arrow::UInt8Array.new([32, 64]),
|
24
|
+
Arrow::UInt8Array.new([128]),
|
25
|
+
]
|
26
|
+
visible_arrays = [
|
27
|
+
Arrow::BooleanArray.new([true, false, nil]),
|
28
|
+
Arrow::BooleanArray.new([true]),
|
29
|
+
Arrow::BooleanArray.new([true, false]),
|
30
|
+
Arrow::BooleanArray.new([nil]),
|
31
|
+
Arrow::BooleanArray.new([nil]),
|
32
|
+
]
|
33
|
+
@count_array = Arrow::ChunkedArray.new(count_arrays)
|
34
|
+
@visible_array = Arrow::ChunkedArray.new(visible_arrays)
|
35
|
+
@count_column = Arrow::Column.new(@count_field, @count_array)
|
36
|
+
@visible_column = Arrow::Column.new(@visible_field, @visible_array)
|
37
|
+
@table = Arrow::Table.new(schema, [@count_column, @visible_column])
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_save_load
|
41
|
+
tempfile = Tempfile.open(["red-parquet", ".parquet"])
|
42
|
+
@table.save(tempfile.path)
|
43
|
+
assert_equal(@table, Arrow::Table.load(tempfile.path))
|
44
|
+
end
|
45
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 0.8.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 0.8.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,12 +111,15 @@ files:
|
|
111
111
|
- doc/text/apache-2.0.txt
|
112
112
|
- doc/text/news.md
|
113
113
|
- lib/parquet.rb
|
114
|
+
- lib/parquet/arrow-table-loadable.rb
|
115
|
+
- lib/parquet/arrow-table-savable.rb
|
114
116
|
- lib/parquet/loader.rb
|
115
117
|
- lib/parquet/version.rb
|
116
118
|
- red-parquet.gemspec
|
117
119
|
- test/helper.rb
|
118
120
|
- test/run-test.rb
|
119
121
|
- test/test-arrow-file-reader.rb
|
122
|
+
- test/test-arrow-table.rb
|
120
123
|
homepage: https://github.com/red-data-tools/red-parquet
|
121
124
|
licenses:
|
122
125
|
- Apache-2.0
|
@@ -137,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
140
|
version: '0'
|
138
141
|
requirements: []
|
139
142
|
rubyforge_project:
|
140
|
-
rubygems_version: 2.5.2
|
143
|
+
rubygems_version: 2.5.2.2
|
141
144
|
signing_key:
|
142
145
|
specification_version: 4
|
143
146
|
summary: Red Parquet is a Ruby bindings of Apache Parquet. Red Parquet is based on
|
@@ -146,3 +149,4 @@ test_files:
|
|
146
149
|
- test/helper.rb
|
147
150
|
- test/run-test.rb
|
148
151
|
- test/test-arrow-file-reader.rb
|
152
|
+
- test/test-arrow-table.rb
|