red-parquet 0.0.2 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +14 -11
- data/{doc/text/apache-2.0.txt → LICENSE.txt} +0 -0
- data/NOTICE.txt +2 -0
- data/README.md +27 -29
- data/Rakefile +17 -25
- data/dependency-check/Rakefile +14 -11
- data/lib/parquet.rb +14 -11
- data/lib/parquet/arrow-table-loadable.rb +21 -15
- data/lib/parquet/arrow-table-savable.rb +19 -14
- data/lib/parquet/loader.rb +14 -11
- data/lib/parquet/version.rb +21 -12
- data/red-parquet.gemspec +29 -32
- data/test/helper.rb +14 -11
- data/test/run-test.rb +19 -12
- data/test/test-arrow-table.rb +21 -12
- metadata +13 -47
- data/.yardopts +0 -6
- data/doc/text/news.md +0 -13
- data/test/test-arrow-file-reader.rb +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bc5d46efc9fc652cd9c8e52345d3be7a17fe5bf22629efb0b5710baa678e8168
|
4
|
+
data.tar.gz: 185c1aadebfa2f0c38224fb2b9a925856736ea13f2084e6c6240c8e8c184a892
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 653075c4bccdf6c800d0ff52e8a37fcaf4477b59dda7dcc1b1a291f91536e0731892afda623c5c74ebf5e7358f8621a822266cc534c26e3c2ccd9a0751f5cc5f
|
7
|
+
data.tar.gz: ff57c715fd2d25cfe08f2fd6eae5f8a05d3a6a291f1e8ed505591d971768ff260cac825fccb767ab04193facb7154b6b137b9a30fea6449ffce53c5aecc638b9
|
data/Gemfile
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
4
10
|
#
|
5
|
-
#
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
12
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# limitations under the License.
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
16
19
|
|
17
20
|
source "https://rubygems.org/"
|
18
21
|
|
File without changes
|
data/NOTICE.txt
ADDED
data/README.md
CHANGED
@@ -1,28 +1,43 @@
|
|
1
|
-
|
1
|
+
<!---
|
2
|
+
Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
or more contributor license agreements. See the NOTICE file
|
4
|
+
distributed with this work for additional information
|
5
|
+
regarding copyright ownership. The ASF licenses this file
|
6
|
+
to you under the Apache License, Version 2.0 (the
|
7
|
+
"License"); you may not use this file except in compliance
|
8
|
+
with the License. You may obtain a copy of the License at
|
2
9
|
|
3
|
-
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
4
11
|
|
5
|
-
|
12
|
+
Unless required by applicable law or agreed to in writing,
|
13
|
+
software distributed under the License is distributed on an
|
14
|
+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
KIND, either express or implied. See the License for the
|
16
|
+
specific language governing permissions and limitations
|
17
|
+
under the License.
|
18
|
+
-->
|
6
19
|
|
7
|
-
|
20
|
+
# Red Parquet - Apache Parquet Ruby
|
8
21
|
|
9
|
-
Red Parquet is
|
22
|
+
Red Parquet is the Ruby bindings of Apache Parquet. Red Parquet is based on GObject Introspection.
|
10
23
|
|
11
24
|
[Apache Parquet](https://parquet.apache.org/) is a columnar storage format.
|
12
25
|
|
13
26
|
[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
|
14
27
|
|
15
|
-
Red Parquet uses [Parquet GLib](https://github.com/
|
28
|
+
Red Parquet uses [Apache Parquet GLib](https://github.com/apache/arrow/tree/master/c_glib/parquet-glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Parquet.
|
16
29
|
|
17
|
-
Parquet GLib is a C wrapper for [Parquet C++](https://github.com/apache/parquet
|
30
|
+
Apache Parquet GLib is a C wrapper for [Apache Parquet C++](https://github.com/apache/arrow/tree/master/cpp/parquet). GObject Introspection can't use Apache Parquet C++ directly. Apache Parquet GLib is a bridge between Apache Parquet C++ and GObject Introspection.
|
18
31
|
|
19
32
|
gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Parquet uses GObject Introspection via gobject-introspection gem.
|
20
33
|
|
21
34
|
## Install
|
22
35
|
|
23
|
-
Install Parquet GLib before install Red Parquet. Use [
|
36
|
+
Install Apache Parquet GLib before install Red Parquet. Use [packages.red-data-tools.org](https://github.com/red-data-tools/packages.red-data-tools.org) for installing Apache Parquet GLib.
|
24
37
|
|
25
|
-
|
38
|
+
Note that the Apache Parquet GLib packages are "unofficial". "Official" packages will be released in the future.
|
39
|
+
|
40
|
+
Install Red Parquet after you install Apache Parquet GLib:
|
26
41
|
|
27
42
|
```text
|
28
43
|
% gem install red-parquet
|
@@ -33,24 +48,7 @@ Install Red Parquet after you install Parquet GLib:
|
|
33
48
|
```ruby
|
34
49
|
require "parquet"
|
35
50
|
|
36
|
-
|
51
|
+
table = Arrow::Table.load("/dev/shm/data.parquet")
|
52
|
+
# Process data in table
|
53
|
+
table.save("/dev/shm/data-processed.parquet")
|
37
54
|
```
|
38
|
-
|
39
|
-
## Dependencies
|
40
|
-
|
41
|
-
* [Apache Parquet C++](https://github.com/apache/parquet-cpp) with Apache Arrow support
|
42
|
-
|
43
|
-
* [Parquet GLib](https://github.com/red-data-tools/parquet-glib)
|
44
|
-
|
45
|
-
* [Red Arrow](https://rubygems.org/gems/red-arrow)
|
46
|
-
|
47
|
-
## Authors
|
48
|
-
|
49
|
-
* Kouhei Sutou \<kou@clear-code.com\>
|
50
|
-
|
51
|
-
## License
|
52
|
-
|
53
|
-
Apache License 2.0. See doc/text/apache-2.0.txt for details.
|
54
|
-
|
55
|
-
(Kouhei Sutou has a right to change the license including contributed
|
56
|
-
patches.)
|
data/Rakefile
CHANGED
@@ -1,40 +1,32 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
4
10
|
#
|
5
|
-
#
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
12
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# limitations under the License.
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
16
19
|
|
17
20
|
require "rubygems"
|
18
21
|
require "bundler/gem_helper"
|
19
|
-
require "packnga"
|
20
22
|
|
21
|
-
base_dir = File.join(
|
23
|
+
base_dir = File.join(__dir__)
|
22
24
|
|
23
25
|
helper = Bundler::GemHelper.new(base_dir)
|
24
|
-
def helper.version_tag
|
25
|
-
version
|
26
|
-
end
|
27
|
-
|
28
26
|
helper.install
|
29
|
-
spec = helper.gemspec
|
30
27
|
|
31
|
-
|
32
|
-
|
33
|
-
task.translate_language = "ja"
|
34
|
-
end
|
35
|
-
|
36
|
-
Packnga::ReleaseTask.new(spec) do
|
37
|
-
end
|
28
|
+
release_task = Rake::Task["release"]
|
29
|
+
release_task.prerequisites.replace(["build", "release:rubygem_push"])
|
38
30
|
|
39
31
|
desc "Run tests"
|
40
32
|
task :test do
|
data/dependency-check/Rakefile
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
4
10
|
#
|
5
|
-
#
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
12
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# limitations under the License.
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
16
19
|
|
17
20
|
require "pkg-config"
|
18
21
|
require "native-package-installer"
|
data/lib/parquet.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
require "arrow"
|
16
19
|
|
@@ -1,24 +1,30 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
module Parquet
|
16
19
|
module ArrowTableLoadable
|
17
20
|
private
|
18
|
-
def load_as_parquet
|
19
|
-
|
20
|
-
reader
|
21
|
-
reader.
|
21
|
+
def load_as_parquet
|
22
|
+
input = open_input_stream
|
23
|
+
reader = Parquet::ArrowFileReader.new(input)
|
24
|
+
reader.use_threads = (@options[:use_threads] != false)
|
25
|
+
table = reader.read_table
|
26
|
+
table.instance_variable_set(:@input, input)
|
27
|
+
table
|
22
28
|
end
|
23
29
|
end
|
24
30
|
end
|
@@ -1,24 +1,29 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
module Parquet
|
16
19
|
module ArrowTableSavable
|
17
20
|
private
|
18
|
-
def save_as_parquet
|
21
|
+
def save_as_parquet
|
19
22
|
chunk_size = @options[:chunk_size] || 1024 # TODO
|
20
|
-
|
21
|
-
|
23
|
+
open_output_stream do |output|
|
24
|
+
Parquet::ArrowFileWriter.open(@table.schema, output) do |writer|
|
25
|
+
writer.write_table(@table, chunk_size)
|
26
|
+
end
|
22
27
|
end
|
23
28
|
end
|
24
29
|
end
|
data/lib/parquet/loader.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
module Parquet
|
16
19
|
class Loader < GObjectIntrospection::Loader
|
data/lib/parquet/version.rb
CHANGED
@@ -1,17 +1,26 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
module Parquet
|
16
|
-
VERSION = "0.0
|
19
|
+
VERSION = "0.12.0"
|
20
|
+
|
21
|
+
module Version
|
22
|
+
numbers, TAG = VERSION.split("-")
|
23
|
+
MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
|
24
|
+
STRING = VERSION
|
25
|
+
end
|
17
26
|
end
|
data/red-parquet.gemspec
CHANGED
@@ -1,52 +1,49 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
4
10
|
#
|
5
|
-
#
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
12
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# limitations under the License.
|
16
|
-
|
17
|
-
clean_white_space = lambda do |entry|
|
18
|
-
entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
|
19
|
-
end
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
20
19
|
|
21
|
-
|
22
|
-
require "parquet/version"
|
20
|
+
require_relative "lib/parquet/version"
|
23
21
|
|
24
22
|
Gem::Specification.new do |spec|
|
25
23
|
spec.name = "red-parquet"
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
24
|
+
version_components = [
|
25
|
+
Parquet::Version::MAJOR.to_s,
|
26
|
+
Parquet::Version::MINOR.to_s,
|
27
|
+
Parquet::Version::MICRO.to_s,
|
28
|
+
Parquet::Version::TAG,
|
29
|
+
]
|
30
|
+
spec.version = version_components.compact.join(".")
|
31
|
+
spec.homepage = "https://arrow.apache.org/"
|
32
|
+
spec.authors = ["Apache Arrow Developers"]
|
33
|
+
spec.email = ["dev@arrow.apache.org"]
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
entries = readme.split(/^\#\#\s(.*)$/)
|
34
|
-
clean_white_space.call(entries[entries.index("Description") + 1])
|
35
|
-
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
36
|
-
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
35
|
+
spec.summary = "Red Parquet is the Ruby bindings of Apache Parquet"
|
36
|
+
spec.description = "Apache Parquet is a columnar storage format."
|
37
37
|
spec.license = "Apache-2.0"
|
38
38
|
spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
|
39
|
-
spec.files += [".
|
39
|
+
spec.files += ["LICENSE.txt", "NOTICE.txt"]
|
40
40
|
spec.files += Dir.glob("lib/**/*.rb")
|
41
|
-
spec.files += Dir.glob("doc/text/*")
|
42
41
|
spec.test_files += Dir.glob("test/**/*")
|
43
42
|
spec.extensions = ["dependency-check/Rakefile"]
|
44
43
|
|
45
|
-
spec.add_runtime_dependency("red-arrow"
|
44
|
+
spec.add_runtime_dependency("red-arrow")
|
46
45
|
|
47
46
|
spec.add_development_dependency("bundler")
|
48
47
|
spec.add_development_dependency("rake")
|
49
48
|
spec.add_development_dependency("test-unit")
|
50
|
-
spec.add_development_dependency("packnga")
|
51
|
-
spec.add_development_dependency("kramdown")
|
52
49
|
end
|
data/test/helper.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
require "parquet"
|
16
19
|
|
data/test/run-test.rb
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4
|
+
# or more contributor license agreements. See the NOTICE file
|
5
|
+
# distributed with this work for additional information
|
6
|
+
# regarding copyright ownership. The ASF licenses this file
|
7
|
+
# to you under the Apache License, Version 2.0 (the
|
8
|
+
# "License"); you may not use this file except in compliance
|
9
|
+
# with the License. You may obtain a copy of the License at
|
4
10
|
#
|
5
|
-
#
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
12
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# limitations under the License.
|
13
|
+
# Unless required by applicable law or agreed to in writing,
|
14
|
+
# software distributed under the License is distributed on an
|
15
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16
|
+
# KIND, either express or implied. See the License for the
|
17
|
+
# specific language governing permissions and limitations
|
18
|
+
# under the License.
|
16
19
|
|
17
20
|
$VERBOSE = true
|
18
21
|
|
19
22
|
require "pathname"
|
20
23
|
|
21
|
-
base_dir = Pathname.new(
|
24
|
+
base_dir = Pathname.new(__dir__).parent.expand_path
|
25
|
+
arrow_base_dir = base_dir.parent + "red-arrow"
|
22
26
|
|
23
27
|
lib_dir = base_dir + "lib"
|
24
28
|
test_dir = base_dir + "test"
|
25
29
|
|
30
|
+
arrow_lib_dir = arrow_base_dir + "lib"
|
31
|
+
|
32
|
+
$LOAD_PATH.unshift(arrow_lib_dir.to_s)
|
26
33
|
$LOAD_PATH.unshift(lib_dir.to_s)
|
27
34
|
|
28
35
|
require_relative "helper"
|
data/test/test-arrow-table.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
-
#
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
2
8
|
#
|
3
|
-
#
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
10
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# limitations under the License.
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
14
17
|
|
15
18
|
class TestArrowTableReader < Test::Unit::TestCase
|
16
19
|
def setup
|
@@ -37,9 +40,15 @@ class TestArrowTableReader < Test::Unit::TestCase
|
|
37
40
|
@table = Arrow::Table.new(schema, [@count_column, @visible_column])
|
38
41
|
end
|
39
42
|
|
40
|
-
def
|
43
|
+
def test_save_load_path
|
41
44
|
tempfile = Tempfile.open(["red-parquet", ".parquet"])
|
42
45
|
@table.save(tempfile.path)
|
43
46
|
assert_equal(@table, Arrow::Table.load(tempfile.path))
|
44
47
|
end
|
48
|
+
|
49
|
+
def test_save_load_buffer
|
50
|
+
buffer = Arrow::ResizableBuffer.new(1024)
|
51
|
+
@table.save(buffer, format: :parquet)
|
52
|
+
assert_equal(@table, Arrow::Table.load(buffer, format: :parquet))
|
53
|
+
end
|
45
54
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Apache Arrow Developers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,50 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
|
70
|
-
name: packnga
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: kramdown
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
description: "[Apache Parquet](https://parquet.apache.org/) is a columnar storage
|
98
|
-
format."
|
69
|
+
description: Apache Parquet is a columnar storage format.
|
99
70
|
email:
|
100
|
-
-
|
71
|
+
- dev@arrow.apache.org
|
101
72
|
executables: []
|
102
73
|
extensions:
|
103
74
|
- dependency-check/Rakefile
|
104
75
|
extra_rdoc_files: []
|
105
76
|
files:
|
106
|
-
- ".yardopts"
|
107
77
|
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- NOTICE.txt
|
108
80
|
- README.md
|
109
81
|
- Rakefile
|
110
82
|
- dependency-check/Rakefile
|
111
|
-
- doc/text/apache-2.0.txt
|
112
|
-
- doc/text/news.md
|
113
83
|
- lib/parquet.rb
|
114
84
|
- lib/parquet/arrow-table-loadable.rb
|
115
85
|
- lib/parquet/arrow-table-savable.rb
|
@@ -118,9 +88,8 @@ files:
|
|
118
88
|
- red-parquet.gemspec
|
119
89
|
- test/helper.rb
|
120
90
|
- test/run-test.rb
|
121
|
-
- test/test-arrow-file-reader.rb
|
122
91
|
- test/test-arrow-table.rb
|
123
|
-
homepage: https://
|
92
|
+
homepage: https://arrow.apache.org/
|
124
93
|
licenses:
|
125
94
|
- Apache-2.0
|
126
95
|
metadata: {}
|
@@ -139,14 +108,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
108
|
- !ruby/object:Gem::Version
|
140
109
|
version: '0'
|
141
110
|
requirements: []
|
142
|
-
|
143
|
-
rubygems_version: 2.5.2.2
|
111
|
+
rubygems_version: 3.0.2
|
144
112
|
signing_key:
|
145
113
|
specification_version: 4
|
146
|
-
summary: Red Parquet is
|
147
|
-
GObject Introspection.
|
114
|
+
summary: Red Parquet is the Ruby bindings of Apache Parquet
|
148
115
|
test_files:
|
149
116
|
- test/helper.rb
|
150
|
-
- test/run-test.rb
|
151
|
-
- test/test-arrow-file-reader.rb
|
152
117
|
- test/test-arrow-table.rb
|
118
|
+
- test/run-test.rb
|
data/.yardopts
DELETED
data/doc/text/news.md
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
class TestArrowFileReader < Test::Unit::TestCase
|
16
|
-
def test_read_write
|
17
|
-
tempfile = Tempfile.open(["data", ".parquet"])
|
18
|
-
|
19
|
-
values = [true, nil, false, true]
|
20
|
-
chunk_size = 2
|
21
|
-
|
22
|
-
field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
|
23
|
-
schema = Arrow::Schema.new([field])
|
24
|
-
Parquet::ArrowFileWriter.open(schema, tempfile.path) do |writer|
|
25
|
-
columns = [
|
26
|
-
Arrow::Column.new(field, Arrow::BooleanArray.new(values)),
|
27
|
-
]
|
28
|
-
table = Arrow::Table.new(schema, columns)
|
29
|
-
writer.write_table(table, chunk_size)
|
30
|
-
end
|
31
|
-
|
32
|
-
reader = Parquet::ArrowFileReader.new(tempfile.path)
|
33
|
-
reader.n_threads = 4
|
34
|
-
assert_equal(chunk_size, reader.n_row_groups)
|
35
|
-
table = reader.read_table
|
36
|
-
table_data = table.each_column.collect do |column|
|
37
|
-
[column.name, column.to_a]
|
38
|
-
end
|
39
|
-
assert_equal([["enabled", values]], table_data)
|
40
|
-
end
|
41
|
-
end
|