embulk-output-utf8parquet 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.idea/gradle.xml +11 -0
- data/.idea/misc.xml +6 -0
- data/.idea/modules.xml +8 -0
- data/.idea/vcs.xml +6 -0
- data/README.md +49 -0
- data/classpath/activation-1.1.jar +0 -0
- data/classpath/apacheds-i18n-2.0.0-M15.jar +0 -0
- data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar +0 -0
- data/classpath/api-asn1-api-1.0.0-M20.jar +0 -0
- data/classpath/api-util-1.0.0-M20.jar +0 -0
- data/classpath/asm-3.1.jar +0 -0
- data/classpath/avro-1.7.4.jar +0 -0
- data/classpath/aws-java-sdk-1.7.4.jar +0 -0
- data/classpath/commons-beanutils-1.7.0.jar +0 -0
- data/classpath/commons-cli-1.2.jar +0 -0
- data/classpath/commons-codec-1.6.jar +0 -0
- data/classpath/commons-collections-3.2.1.jar +0 -0
- data/classpath/commons-compress-1.4.1.jar +0 -0
- data/classpath/commons-configuration-1.6.jar +0 -0
- data/classpath/commons-digester-1.8.jar +0 -0
- data/classpath/commons-httpclient-3.1.jar +0 -0
- data/classpath/commons-io-2.4.jar +0 -0
- data/classpath/commons-lang-2.6.jar +0 -0
- data/classpath/commons-logging-1.1.3.jar +0 -0
- data/classpath/commons-math3-3.1.1.jar +0 -0
- data/classpath/commons-net-3.1.jar +0 -0
- data/classpath/curator-client-2.7.1.jar +0 -0
- data/classpath/curator-framework-2.7.1.jar +0 -0
- data/classpath/curator-recipes-2.7.1.jar +0 -0
- data/classpath/embulk-output-utf8parquet-1.0.0.jar +0 -0
- data/classpath/gson-2.2.4.jar +0 -0
- data/classpath/hadoop-annotations-2.7.1.jar +0 -0
- data/classpath/hadoop-auth-2.7.1.jar +0 -0
- data/classpath/hadoop-aws-2.7.1.jar +0 -0
- data/classpath/hadoop-client-2.7.1.jar +0 -0
- data/classpath/hadoop-common-2.7.1.jar +0 -0
- data/classpath/hadoop-hdfs-2.7.1.jar +0 -0
- data/classpath/hadoop-mapreduce-client-app-2.7.1.jar +0 -0
- data/classpath/hadoop-mapreduce-client-common-2.7.1.jar +0 -0
- data/classpath/hadoop-mapreduce-client-core-2.7.1.jar +0 -0
- data/classpath/hadoop-mapreduce-client-jobclient-2.7.1.jar +0 -0
- data/classpath/hadoop-mapreduce-client-shuffle-2.7.1.jar +0 -0
- data/classpath/hadoop-yarn-api-2.7.1.jar +0 -0
- data/classpath/hadoop-yarn-client-2.7.1.jar +0 -0
- data/classpath/hadoop-yarn-common-2.7.1.jar +0 -0
- data/classpath/hadoop-yarn-server-common-2.7.1.jar +0 -0
- data/classpath/hadoop-yarn-server-nodemanager-2.7.1.jar +0 -0
- data/classpath/htrace-core-3.1.0-incubating.jar +0 -0
- data/classpath/httpclient-4.2.5.jar +0 -0
- data/classpath/httpcore-4.2.4.jar +0 -0
- data/classpath/jackson-core-asl-1.9.13.jar +0 -0
- data/classpath/jackson-jaxrs-1.9.13.jar +0 -0
- data/classpath/jackson-mapper-asl-1.9.13.jar +0 -0
- data/classpath/jackson-xc-1.9.13.jar +0 -0
- data/classpath/java-xmlbuilder-0.4.jar +0 -0
- data/classpath/jaxb-api-2.2.2.jar +0 -0
- data/classpath/jaxb-impl-2.2.3-1.jar +0 -0
- data/classpath/jersey-client-1.9.jar +0 -0
- data/classpath/jersey-core-1.9.jar +0 -0
- data/classpath/jersey-guice-1.9.jar +0 -0
- data/classpath/jersey-json-1.9.jar +0 -0
- data/classpath/jersey-server-1.9.jar +0 -0
- data/classpath/jets3t-0.9.0.jar +0 -0
- data/classpath/jettison-1.1.jar +0 -0
- data/classpath/jetty-6.1.26.jar +0 -0
- data/classpath/jetty-util-6.1.26.jar +0 -0
- data/classpath/jline-0.9.94.jar +0 -0
- data/classpath/joda-time-2.9.9.jar +0 -0
- data/classpath/jsch-0.1.42.jar +0 -0
- data/classpath/jsp-api-2.1.jar +0 -0
- data/classpath/jsr305-3.0.0.jar +0 -0
- data/classpath/leveldbjni-all-1.8.jar +0 -0
- data/classpath/log4j-1.2.17.jar +0 -0
- data/classpath/netty-3.7.0.Final.jar +0 -0
- data/classpath/netty-all-4.0.23.Final.jar +0 -0
- data/classpath/paranamer-2.3.jar +0 -0
- data/classpath/parquet-column-1.8.1.jar +0 -0
- data/classpath/parquet-common-1.8.1.jar +0 -0
- data/classpath/parquet-encoding-1.8.1.jar +0 -0
- data/classpath/parquet-format-2.3.0-incubating.jar +0 -0
- data/classpath/parquet-hadoop-1.8.1.jar +0 -0
- data/classpath/parquet-jackson-1.8.1.jar +0 -0
- data/classpath/protobuf-java-2.5.0.jar +0 -0
- data/classpath/servlet-api-2.5.jar +0 -0
- data/classpath/snappy-java-1.1.1.6.jar +0 -0
- data/classpath/stax-api-1.0-2.jar +0 -0
- data/classpath/xercesImpl-2.9.1.jar +0 -0
- data/classpath/xml-apis-1.3.04.jar +0 -0
- data/classpath/xmlenc-0.52.jar +0 -0
- data/classpath/xz-1.0.jar +0 -0
- data/classpath/zookeeper-3.4.6.jar +0 -0
- data/embulk-output-utf8parquet.iml +9 -0
- metadata +164 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 217f5b1b8a369ad9d2327b1c59aff5d9c0333eb6
|
4
|
+
data.tar.gz: 862c3aec856f11163ba30d008e98afe469aea538
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6bb6be1333e7869d5b8d711f5a410288b7fa9df9cc18d32674b33908736ae9daf99d1d4314a5a7fa2ddc32f189214256be6c757ab71f1091c2629a9aaae5c3e9
|
7
|
+
data.tar.gz: f10c1e3f01d42f5826bf8edcca966a05ab7c9abfc5e7e8a3ccb7df866ea9aa28f78e2c960a0e61e09fc39451ef8491b6603b8e7d8757603ba142264eaabfb2bd
|
data/.idea/gradle.xml
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="GradleSettings">
|
4
|
+
<option name="linkedExternalProjectsSettings">
|
5
|
+
<GradleProjectSettings>
|
6
|
+
<option name="distributionType" value="DEFAULT_WRAPPED" />
|
7
|
+
<option name="externalProjectPath" value="$PROJECT_DIR$" />
|
8
|
+
</GradleProjectSettings>
|
9
|
+
</option>
|
10
|
+
</component>
|
11
|
+
</project>
|
data/.idea/misc.xml
ADDED
data/.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ProjectModuleManager">
|
4
|
+
<modules>
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/embulk-output-utf8parquet.iml" filepath="$PROJECT_DIR$/embulk-output-utf8parquet.iml" />
|
6
|
+
</modules>
|
7
|
+
</component>
|
8
|
+
</project>
|
data/.idea/vcs.xml
ADDED
data/README.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Parquet output plugin for Embulk
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
* **Plugin type**: output
|
6
|
+
* **Load all or nothing**: no
|
7
|
+
* **Resume supported**: no
|
8
|
+
* **Cleanup supported**: no
|
9
|
+
|
10
|
+
## Configuration
|
11
|
+
|
12
|
+
- **path_prefix**: A prefix of output path. This is hadoop Path URI, and you can also include `scheme` and `authority` within this parameter. (string, required)
|
13
|
+
- **file_ext**: An extension of output path. (string, default: .parquet)
|
14
|
+
- **sequence_format**: (string, default: .%03d)
|
15
|
+
- **block_size**: A block size of parquet file. (int, default: 134217728(128M))
|
16
|
+
- **page_size**: A page size of parquet file. (int, default: 1048576(1M))
|
17
|
+
- **compression_codec**: A compression codec. available: UNCOMPRESSED, SNAPPY, GZIP (string, default: UNCOMPRESSED)
|
18
|
+
- **default_timezone**: Time zone of timestamp columns. This can be overwritten for each column using column_options
|
19
|
+
- **default_timestamp_format**: Format of timestamp columns. This can be overwritten for each column using column_options
|
20
|
+
- **column_options**: Specify timezone and timestamp format for each column. Format of this option is the same as the official csv formatter. See [document](
|
21
|
+
http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
|
22
|
+
- **extra_configurations**: Add extra entries to Configuration which will be passed to ParquetWriter
|
23
|
+
- **overwrite**: Overwrite if output files already exist. (default: fail if files exist)
|
24
|
+
- **addUTF8**: If true, string columns are stored with OriginalType.UTF8 (boolean, default false)
|
25
|
+
|
26
|
+
## Example
|
27
|
+
|
28
|
+
```yaml
|
29
|
+
out:
|
30
|
+
type: parquet
|
31
|
+
path_prefix: file:///data/output
|
32
|
+
```
|
33
|
+
|
34
|
+
### How to write parquet files into S3
|
35
|
+
|
36
|
+
```yaml
|
37
|
+
out:
|
38
|
+
type: parquet
|
39
|
+
path_prefix: s3a://bucket/keys
|
40
|
+
extra_configurations:
|
41
|
+
fs.s3a.access.key: 'your_access_key'
|
42
|
+
fs.s3a.secret.key: 'your_secret_access_key'
|
43
|
+
```
|
44
|
+
|
45
|
+
## Build
|
46
|
+
|
47
|
+
```
|
48
|
+
$ ./gradlew gem
|
49
|
+
```
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module external.linked.project.id="embulk-output-utf8parquet" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" type="JAVA_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4
|
+
<exclude-output />
|
5
|
+
<content url="file://$MODULE_DIR$" />
|
6
|
+
<orderEntry type="inheritedJdk" />
|
7
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
8
|
+
</component>
|
9
|
+
</module>
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-output-utf8parquet
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- OKUNO Akihiro
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-05-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ~>
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '1.0'
|
19
|
+
name: bundler
|
20
|
+
prerelease: false
|
21
|
+
type: :development
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '10.0'
|
33
|
+
name: rake
|
34
|
+
prerelease: false
|
35
|
+
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: Parquet output plugin is an Embulk plugin that loads records to Parquet read by any input plugins. Search the input plugins by "embulk-input" keyword.
|
42
|
+
email:
|
43
|
+
- alexopoulos7@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .idea/gradle.xml
|
49
|
+
- .idea/misc.xml
|
50
|
+
- .idea/modules.xml
|
51
|
+
- .idea/vcs.xml
|
52
|
+
- README.md
|
53
|
+
- embulk-output-utf8parquet.iml
|
54
|
+
- classpath/jaxb-impl-2.2.3-1.jar
|
55
|
+
- classpath/hadoop-aws-2.7.1.jar
|
56
|
+
- classpath/hadoop-auth-2.7.1.jar
|
57
|
+
- classpath/activation-1.1.jar
|
58
|
+
- classpath/commons-configuration-1.6.jar
|
59
|
+
- classpath/commons-beanutils-1.7.0.jar
|
60
|
+
- classpath/hadoop-yarn-client-2.7.1.jar
|
61
|
+
- classpath/xz-1.0.jar
|
62
|
+
- classpath/commons-httpclient-3.1.jar
|
63
|
+
- classpath/stax-api-1.0-2.jar
|
64
|
+
- classpath/apacheds-i18n-2.0.0-M15.jar
|
65
|
+
- classpath/joda-time-2.9.9.jar
|
66
|
+
- classpath/httpclient-4.2.5.jar
|
67
|
+
- classpath/jline-0.9.94.jar
|
68
|
+
- classpath/jaxb-api-2.2.2.jar
|
69
|
+
- classpath/hadoop-annotations-2.7.1.jar
|
70
|
+
- classpath/hadoop-mapreduce-client-jobclient-2.7.1.jar
|
71
|
+
- classpath/hadoop-hdfs-2.7.1.jar
|
72
|
+
- classpath/jackson-jaxrs-1.9.13.jar
|
73
|
+
- classpath/xercesImpl-2.9.1.jar
|
74
|
+
- classpath/commons-logging-1.1.3.jar
|
75
|
+
- classpath/hadoop-yarn-server-common-2.7.1.jar
|
76
|
+
- classpath/curator-recipes-2.7.1.jar
|
77
|
+
- classpath/hadoop-yarn-server-nodemanager-2.7.1.jar
|
78
|
+
- classpath/jersey-json-1.9.jar
|
79
|
+
- classpath/avro-1.7.4.jar
|
80
|
+
- classpath/log4j-1.2.17.jar
|
81
|
+
- classpath/commons-cli-1.2.jar
|
82
|
+
- classpath/parquet-column-1.8.1.jar
|
83
|
+
- classpath/xml-apis-1.3.04.jar
|
84
|
+
- classpath/commons-digester-1.8.jar
|
85
|
+
- classpath/servlet-api-2.5.jar
|
86
|
+
- classpath/parquet-format-2.3.0-incubating.jar
|
87
|
+
- classpath/protobuf-java-2.5.0.jar
|
88
|
+
- classpath/hadoop-mapreduce-client-common-2.7.1.jar
|
89
|
+
- classpath/xmlenc-0.52.jar
|
90
|
+
- classpath/jackson-xc-1.9.13.jar
|
91
|
+
- classpath/jetty-util-6.1.26.jar
|
92
|
+
- classpath/hadoop-mapreduce-client-shuffle-2.7.1.jar
|
93
|
+
- classpath/commons-compress-1.4.1.jar
|
94
|
+
- classpath/hadoop-yarn-common-2.7.1.jar
|
95
|
+
- classpath/commons-io-2.4.jar
|
96
|
+
- classpath/hadoop-mapreduce-client-core-2.7.1.jar
|
97
|
+
- classpath/jackson-core-asl-1.9.13.jar
|
98
|
+
- classpath/jersey-core-1.9.jar
|
99
|
+
- classpath/jsp-api-2.1.jar
|
100
|
+
- classpath/commons-codec-1.6.jar
|
101
|
+
- classpath/snappy-java-1.1.1.6.jar
|
102
|
+
- classpath/jetty-6.1.26.jar
|
103
|
+
- classpath/hadoop-yarn-api-2.7.1.jar
|
104
|
+
- classpath/jersey-server-1.9.jar
|
105
|
+
- classpath/java-xmlbuilder-0.4.jar
|
106
|
+
- classpath/netty-3.7.0.Final.jar
|
107
|
+
- classpath/hadoop-common-2.7.1.jar
|
108
|
+
- classpath/jersey-client-1.9.jar
|
109
|
+
- classpath/jersey-guice-1.9.jar
|
110
|
+
- classpath/paranamer-2.3.jar
|
111
|
+
- classpath/zookeeper-3.4.6.jar
|
112
|
+
- classpath/parquet-encoding-1.8.1.jar
|
113
|
+
- classpath/jettison-1.1.jar
|
114
|
+
- classpath/api-asn1-api-1.0.0-M20.jar
|
115
|
+
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
116
|
+
- classpath/parquet-hadoop-1.8.1.jar
|
117
|
+
- classpath/commons-collections-3.2.1.jar
|
118
|
+
- classpath/asm-3.1.jar
|
119
|
+
- classpath/parquet-common-1.8.1.jar
|
120
|
+
- classpath/hadoop-client-2.7.1.jar
|
121
|
+
- classpath/api-util-1.0.0-M20.jar
|
122
|
+
- classpath/embulk-output-utf8parquet-1.0.0.jar
|
123
|
+
- classpath/curator-framework-2.7.1.jar
|
124
|
+
- classpath/commons-net-3.1.jar
|
125
|
+
- classpath/gson-2.2.4.jar
|
126
|
+
- classpath/jets3t-0.9.0.jar
|
127
|
+
- classpath/commons-lang-2.6.jar
|
128
|
+
- classpath/parquet-jackson-1.8.1.jar
|
129
|
+
- classpath/jsch-0.1.42.jar
|
130
|
+
- classpath/leveldbjni-all-1.8.jar
|
131
|
+
- classpath/httpcore-4.2.4.jar
|
132
|
+
- classpath/hadoop-mapreduce-client-app-2.7.1.jar
|
133
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
134
|
+
- classpath/commons-math3-3.1.1.jar
|
135
|
+
- classpath/netty-all-4.0.23.Final.jar
|
136
|
+
- classpath/aws-java-sdk-1.7.4.jar
|
137
|
+
- classpath/htrace-core-3.1.0-incubating.jar
|
138
|
+
- classpath/jsr305-3.0.0.jar
|
139
|
+
- classpath/curator-client-2.7.1.jar
|
140
|
+
homepage: https://github.com/alexopoulos7/embulk-output-utf8parquet
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubyforge_project:
|
160
|
+
rubygems_version: 2.1.9
|
161
|
+
signing_key:
|
162
|
+
specification_version: 4
|
163
|
+
summary: Parquet output plugin for Embulk with UTF8 support
|
164
|
+
test_files: []
|