embulk-output-utf8parquet 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/.idea/gradle.xml +11 -0
  3. data/.idea/misc.xml +6 -0
  4. data/.idea/modules.xml +8 -0
  5. data/.idea/vcs.xml +6 -0
  6. data/README.md +49 -0
  7. data/classpath/activation-1.1.jar +0 -0
  8. data/classpath/apacheds-i18n-2.0.0-M15.jar +0 -0
  9. data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar +0 -0
  10. data/classpath/api-asn1-api-1.0.0-M20.jar +0 -0
  11. data/classpath/api-util-1.0.0-M20.jar +0 -0
  12. data/classpath/asm-3.1.jar +0 -0
  13. data/classpath/avro-1.7.4.jar +0 -0
  14. data/classpath/aws-java-sdk-1.7.4.jar +0 -0
  15. data/classpath/commons-beanutils-1.7.0.jar +0 -0
  16. data/classpath/commons-cli-1.2.jar +0 -0
  17. data/classpath/commons-codec-1.6.jar +0 -0
  18. data/classpath/commons-collections-3.2.1.jar +0 -0
  19. data/classpath/commons-compress-1.4.1.jar +0 -0
  20. data/classpath/commons-configuration-1.6.jar +0 -0
  21. data/classpath/commons-digester-1.8.jar +0 -0
  22. data/classpath/commons-httpclient-3.1.jar +0 -0
  23. data/classpath/commons-io-2.4.jar +0 -0
  24. data/classpath/commons-lang-2.6.jar +0 -0
  25. data/classpath/commons-logging-1.1.3.jar +0 -0
  26. data/classpath/commons-math3-3.1.1.jar +0 -0
  27. data/classpath/commons-net-3.1.jar +0 -0
  28. data/classpath/curator-client-2.7.1.jar +0 -0
  29. data/classpath/curator-framework-2.7.1.jar +0 -0
  30. data/classpath/curator-recipes-2.7.1.jar +0 -0
  31. data/classpath/embulk-output-utf8parquet-1.0.0.jar +0 -0
  32. data/classpath/gson-2.2.4.jar +0 -0
  33. data/classpath/hadoop-annotations-2.7.1.jar +0 -0
  34. data/classpath/hadoop-auth-2.7.1.jar +0 -0
  35. data/classpath/hadoop-aws-2.7.1.jar +0 -0
  36. data/classpath/hadoop-client-2.7.1.jar +0 -0
  37. data/classpath/hadoop-common-2.7.1.jar +0 -0
  38. data/classpath/hadoop-hdfs-2.7.1.jar +0 -0
  39. data/classpath/hadoop-mapreduce-client-app-2.7.1.jar +0 -0
  40. data/classpath/hadoop-mapreduce-client-common-2.7.1.jar +0 -0
  41. data/classpath/hadoop-mapreduce-client-core-2.7.1.jar +0 -0
  42. data/classpath/hadoop-mapreduce-client-jobclient-2.7.1.jar +0 -0
  43. data/classpath/hadoop-mapreduce-client-shuffle-2.7.1.jar +0 -0
  44. data/classpath/hadoop-yarn-api-2.7.1.jar +0 -0
  45. data/classpath/hadoop-yarn-client-2.7.1.jar +0 -0
  46. data/classpath/hadoop-yarn-common-2.7.1.jar +0 -0
  47. data/classpath/hadoop-yarn-server-common-2.7.1.jar +0 -0
  48. data/classpath/hadoop-yarn-server-nodemanager-2.7.1.jar +0 -0
  49. data/classpath/htrace-core-3.1.0-incubating.jar +0 -0
  50. data/classpath/httpclient-4.2.5.jar +0 -0
  51. data/classpath/httpcore-4.2.4.jar +0 -0
  52. data/classpath/jackson-core-asl-1.9.13.jar +0 -0
  53. data/classpath/jackson-jaxrs-1.9.13.jar +0 -0
  54. data/classpath/jackson-mapper-asl-1.9.13.jar +0 -0
  55. data/classpath/jackson-xc-1.9.13.jar +0 -0
  56. data/classpath/java-xmlbuilder-0.4.jar +0 -0
  57. data/classpath/jaxb-api-2.2.2.jar +0 -0
  58. data/classpath/jaxb-impl-2.2.3-1.jar +0 -0
  59. data/classpath/jersey-client-1.9.jar +0 -0
  60. data/classpath/jersey-core-1.9.jar +0 -0
  61. data/classpath/jersey-guice-1.9.jar +0 -0
  62. data/classpath/jersey-json-1.9.jar +0 -0
  63. data/classpath/jersey-server-1.9.jar +0 -0
  64. data/classpath/jets3t-0.9.0.jar +0 -0
  65. data/classpath/jettison-1.1.jar +0 -0
  66. data/classpath/jetty-6.1.26.jar +0 -0
  67. data/classpath/jetty-util-6.1.26.jar +0 -0
  68. data/classpath/jline-0.9.94.jar +0 -0
  69. data/classpath/joda-time-2.9.9.jar +0 -0
  70. data/classpath/jsch-0.1.42.jar +0 -0
  71. data/classpath/jsp-api-2.1.jar +0 -0
  72. data/classpath/jsr305-3.0.0.jar +0 -0
  73. data/classpath/leveldbjni-all-1.8.jar +0 -0
  74. data/classpath/log4j-1.2.17.jar +0 -0
  75. data/classpath/netty-3.7.0.Final.jar +0 -0
  76. data/classpath/netty-all-4.0.23.Final.jar +0 -0
  77. data/classpath/paranamer-2.3.jar +0 -0
  78. data/classpath/parquet-column-1.8.1.jar +0 -0
  79. data/classpath/parquet-common-1.8.1.jar +0 -0
  80. data/classpath/parquet-encoding-1.8.1.jar +0 -0
  81. data/classpath/parquet-format-2.3.0-incubating.jar +0 -0
  82. data/classpath/parquet-hadoop-1.8.1.jar +0 -0
  83. data/classpath/parquet-jackson-1.8.1.jar +0 -0
  84. data/classpath/protobuf-java-2.5.0.jar +0 -0
  85. data/classpath/servlet-api-2.5.jar +0 -0
  86. data/classpath/snappy-java-1.1.1.6.jar +0 -0
  87. data/classpath/stax-api-1.0-2.jar +0 -0
  88. data/classpath/xercesImpl-2.9.1.jar +0 -0
  89. data/classpath/xml-apis-1.3.04.jar +0 -0
  90. data/classpath/xmlenc-0.52.jar +0 -0
  91. data/classpath/xz-1.0.jar +0 -0
  92. data/classpath/zookeeper-3.4.6.jar +0 -0
  93. data/embulk-output-utf8parquet.iml +9 -0
  94. metadata +164 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 217f5b1b8a369ad9d2327b1c59aff5d9c0333eb6
4
+ data.tar.gz: 862c3aec856f11163ba30d008e98afe469aea538
5
+ SHA512:
6
+ metadata.gz: 6bb6be1333e7869d5b8d711f5a410288b7fa9df9cc18d32674b33908736ae9daf99d1d4314a5a7fa2ddc32f189214256be6c757ab71f1091c2629a9aaae5c3e9
7
+ data.tar.gz: f10c1e3f01d42f5826bf8edcca966a05ab7c9abfc5e7e8a3ccb7df866ea9aa28f78e2c960a0e61e09fc39451ef8491b6603b8e7d8757603ba142264eaabfb2bd
data/.idea/gradle.xml ADDED
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="GradleSettings">
4
+ <option name="linkedExternalProjectsSettings">
5
+ <GradleProjectSettings>
6
+ <option name="distributionType" value="DEFAULT_WRAPPED" />
7
+ <option name="externalProjectPath" value="$PROJECT_DIR$" />
8
+ </GradleProjectSettings>
9
+ </option>
10
+ </component>
11
+ </project>
data/.idea/misc.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" languageLevel="JDK_1_9" project-jdk-name="9.0" project-jdk-type="JavaSDK">
4
+ <output url="file://$PROJECT_DIR$/classes" />
5
+ </component>
6
+ </project>
data/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/embulk-output-utf8parquet.iml" filepath="$PROJECT_DIR$/embulk-output-utf8parquet.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
data/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # Parquet output plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: output
6
+ * **Load all or nothing**: no
7
+ * **Resume supported**: no
8
+ * **Cleanup supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **path_prefix**: A prefix of output path. This is hadoop Path URI, and you can also include `scheme` and `authority` within this parameter. (string, required)
13
+ - **file_ext**: An extension of output path. (string, default: .parquet)
14
+ - **sequence_format**: (string, default: .%03d)
15
+ - **block_size**: A block size of parquet file. (int, default: 134217728(128M))
16
+ - **page_size**: A page size of parquet file. (int, default: 1048576(1M))
17
+ - **compression_codec**: A compression codec. available: UNCOMPRESSED, SNAPPY, GZIP (string, default: UNCOMPRESSED)
18
+ - **default_timezone**: Time zone of timestamp columns. This can be overwritten for each column using column_options
19
+ - **default_timestamp_format**: Format of timestamp columns. This can be overwritten for each column using column_options
20
+ - **column_options**: Specify timezone and timestamp format for each column. Format of this option is the same as the official csv formatter. See [document](
21
+ http://www.embulk.org/docs/built-in.html#csv-formatter-plugin).
22
+ - **extra_configurations**: Add extra entries to Configuration which will be passed to ParquetWriter
23
+ - **overwrite**: Overwrite if output files already exist. (default: fail if files exist)
24
+ - **addUTF8**: If true, string columns are stored with OriginalType.UTF8 (boolean, default false)
25
+
26
+ ## Example
27
+
28
+ ```yaml
29
+ out:
30
+ type: parquet
31
+ path_prefix: file:///data/output
32
+ ```
33
+
34
+ ### How to write parquet files into S3
35
+
36
+ ```yaml
37
+ out:
38
+ type: parquet
39
+ path_prefix: s3a://bucket/keys
40
+ extra_configurations:
41
+ fs.s3a.access.key: 'your_access_key'
42
+ fs.s3a.secret.key: 'your_secret_access_key'
43
+ ```
44
+
45
+ ## Build
46
+
47
+ ```
48
+ $ ./gradlew gem
49
+ ```
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module external.linked.project.id="embulk-output-utf8parquet" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" type="JAVA_MODULE" version="4">
3
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
4
+ <exclude-output />
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="inheritedJdk" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ </component>
9
+ </module>
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-utf8parquet
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - OKUNO Akihiro
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-05-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Parquet output plugin is an Embulk plugin that loads records to Parquet read by any input plugins. Search the input plugins by "embulk-input" keyword.
42
+ email:
43
+ - alexopoulos7@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .idea/gradle.xml
49
+ - .idea/misc.xml
50
+ - .idea/modules.xml
51
+ - .idea/vcs.xml
52
+ - README.md
53
+ - embulk-output-utf8parquet.iml
54
+ - classpath/jaxb-impl-2.2.3-1.jar
55
+ - classpath/hadoop-aws-2.7.1.jar
56
+ - classpath/hadoop-auth-2.7.1.jar
57
+ - classpath/activation-1.1.jar
58
+ - classpath/commons-configuration-1.6.jar
59
+ - classpath/commons-beanutils-1.7.0.jar
60
+ - classpath/hadoop-yarn-client-2.7.1.jar
61
+ - classpath/xz-1.0.jar
62
+ - classpath/commons-httpclient-3.1.jar
63
+ - classpath/stax-api-1.0-2.jar
64
+ - classpath/apacheds-i18n-2.0.0-M15.jar
65
+ - classpath/joda-time-2.9.9.jar
66
+ - classpath/httpclient-4.2.5.jar
67
+ - classpath/jline-0.9.94.jar
68
+ - classpath/jaxb-api-2.2.2.jar
69
+ - classpath/hadoop-annotations-2.7.1.jar
70
+ - classpath/hadoop-mapreduce-client-jobclient-2.7.1.jar
71
+ - classpath/hadoop-hdfs-2.7.1.jar
72
+ - classpath/jackson-jaxrs-1.9.13.jar
73
+ - classpath/xercesImpl-2.9.1.jar
74
+ - classpath/commons-logging-1.1.3.jar
75
+ - classpath/hadoop-yarn-server-common-2.7.1.jar
76
+ - classpath/curator-recipes-2.7.1.jar
77
+ - classpath/hadoop-yarn-server-nodemanager-2.7.1.jar
78
+ - classpath/jersey-json-1.9.jar
79
+ - classpath/avro-1.7.4.jar
80
+ - classpath/log4j-1.2.17.jar
81
+ - classpath/commons-cli-1.2.jar
82
+ - classpath/parquet-column-1.8.1.jar
83
+ - classpath/xml-apis-1.3.04.jar
84
+ - classpath/commons-digester-1.8.jar
85
+ - classpath/servlet-api-2.5.jar
86
+ - classpath/parquet-format-2.3.0-incubating.jar
87
+ - classpath/protobuf-java-2.5.0.jar
88
+ - classpath/hadoop-mapreduce-client-common-2.7.1.jar
89
+ - classpath/xmlenc-0.52.jar
90
+ - classpath/jackson-xc-1.9.13.jar
91
+ - classpath/jetty-util-6.1.26.jar
92
+ - classpath/hadoop-mapreduce-client-shuffle-2.7.1.jar
93
+ - classpath/commons-compress-1.4.1.jar
94
+ - classpath/hadoop-yarn-common-2.7.1.jar
95
+ - classpath/commons-io-2.4.jar
96
+ - classpath/hadoop-mapreduce-client-core-2.7.1.jar
97
+ - classpath/jackson-core-asl-1.9.13.jar
98
+ - classpath/jersey-core-1.9.jar
99
+ - classpath/jsp-api-2.1.jar
100
+ - classpath/commons-codec-1.6.jar
101
+ - classpath/snappy-java-1.1.1.6.jar
102
+ - classpath/jetty-6.1.26.jar
103
+ - classpath/hadoop-yarn-api-2.7.1.jar
104
+ - classpath/jersey-server-1.9.jar
105
+ - classpath/java-xmlbuilder-0.4.jar
106
+ - classpath/netty-3.7.0.Final.jar
107
+ - classpath/hadoop-common-2.7.1.jar
108
+ - classpath/jersey-client-1.9.jar
109
+ - classpath/jersey-guice-1.9.jar
110
+ - classpath/paranamer-2.3.jar
111
+ - classpath/zookeeper-3.4.6.jar
112
+ - classpath/parquet-encoding-1.8.1.jar
113
+ - classpath/jettison-1.1.jar
114
+ - classpath/api-asn1-api-1.0.0-M20.jar
115
+ - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
116
+ - classpath/parquet-hadoop-1.8.1.jar
117
+ - classpath/commons-collections-3.2.1.jar
118
+ - classpath/asm-3.1.jar
119
+ - classpath/parquet-common-1.8.1.jar
120
+ - classpath/hadoop-client-2.7.1.jar
121
+ - classpath/api-util-1.0.0-M20.jar
122
+ - classpath/embulk-output-utf8parquet-1.0.0.jar
123
+ - classpath/curator-framework-2.7.1.jar
124
+ - classpath/commons-net-3.1.jar
125
+ - classpath/gson-2.2.4.jar
126
+ - classpath/jets3t-0.9.0.jar
127
+ - classpath/commons-lang-2.6.jar
128
+ - classpath/parquet-jackson-1.8.1.jar
129
+ - classpath/jsch-0.1.42.jar
130
+ - classpath/leveldbjni-all-1.8.jar
131
+ - classpath/httpcore-4.2.4.jar
132
+ - classpath/hadoop-mapreduce-client-app-2.7.1.jar
133
+ - classpath/jackson-mapper-asl-1.9.13.jar
134
+ - classpath/commons-math3-3.1.1.jar
135
+ - classpath/netty-all-4.0.23.Final.jar
136
+ - classpath/aws-java-sdk-1.7.4.jar
137
+ - classpath/htrace-core-3.1.0-incubating.jar
138
+ - classpath/jsr305-3.0.0.jar
139
+ - classpath/curator-client-2.7.1.jar
140
+ homepage: https://github.com/alexopoulos7/embulk-output-utf8parquet
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubyforge_project:
160
+ rubygems_version: 2.1.9
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Parquet output plugin for Embulk with UTF8 support
164
+ test_files: []