embulk-output-orc 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c842edfe45c7e992faae16afd3331c7f8ecf256
4
- data.tar.gz: a6b3d098e7b012a07f4870e2fc92c898b09e1560
3
+ metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
4
+ data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
5
5
  SHA512:
6
- metadata.gz: 3bf5bc9e310496191419ee1e9a76cf9912321e9df2d178081c324ed46f83e7600e04912df59a7ebf38e0e3761be4b8ea1931cae549d681a8ba5b6d35b1e19990
7
- data.tar.gz: bbec4349adf56b4c684084a39f61eee8dd41152a11de02c2b5573e5e933cf8d76f280e7110f030cb31b157fdfc9e7fa457f575cd8a6b5b4d8df8ea484a51f358
6
+ metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
7
+ data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
@@ -0,0 +1,12 @@
1
+ language: java
2
+ jdk:
3
+ - oraclejdk8
4
+
5
+ cache:
6
+ directories: # run "travis cache --delete" to delete caches
7
+ - $HOME/.gradle
8
+
9
+ sudo: false
10
+ script:
11
+ - ./gradlew --info checkstyle
12
+ - ./gradlew --info check
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Orc output plugin for Embulk
2
2
 
3
- TODO: Write short description here and build.gradle file.
3
+ [![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
4
4
 
5
5
  ## Overview
6
6
 
@@ -11,17 +11,25 @@ TODO: Write short description here and build.gradle file.
11
11
 
12
12
  ## Configuration
13
13
 
14
- - **option1**: description (integer, required)
15
- - **option2**: description (string, default: `"myvalue"`)
16
- - **option3**: description (string, default: `null`)
14
+ - **path_prefix**: A prefix of output path. (string, required)
15
+ - **file_ext**: An extension of output file. (string, default: `.orc`)
16
+ - **sequence_format**: (string, default: `.%03d`)
17
+ - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
18
+ - **strip_size**: Set the ORC strip size (integer, default: `100000`)
19
+ - **compression_kind**: description (string, default: `'ZLIB'`)
20
+ - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
21
+ - **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
17
22
 
18
23
  ## Example
19
24
 
20
25
  ```yaml
21
26
  out:
22
27
  type: orc
23
- option1: example1
24
- option2: example2
28
+ path_prefix: "/tmp/output"
29
+ buffer_size: 8000
30
+ strip_size: 90000
31
+ compression_kind: ZLIB
32
+ overwrite: true
25
33
  ```
26
34
 
27
35
 
@@ -14,7 +14,7 @@ configurations {
14
14
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
15
15
  }
16
16
 
17
- version = "0.0.2"
17
+ version = "0.1.0"
18
18
 
19
19
  sourceCompatibility = 1.8
20
20
  targetCompatibility = 1.8
@@ -22,15 +22,10 @@ targetCompatibility = 1.8
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.29"
24
24
  provided "org.embulk:embulk-core:0.8.29"
25
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
25
 
27
26
  compile "org.apache.orc:orc:1.4.0"
28
27
  compile "org.apache.orc:orc-core:1.4.0"
29
- compile "org.apache.orc:orc-mapreduce:1.4.0"
30
-
31
- compile "org.apache.hadoop:hadoop-common:2.6.4"
32
- compile "org.apache.hadoop:hadoop-client:2.6.4"
33
- // compile "org.apache.hadoop:hadoop-hdfs:2.7.4"
28
+ compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
34
29
 
35
30
  testCompile "junit:junit:4.+"
36
31
  testCompile "org.embulk:embulk-core:0.8.29:tests"
@@ -89,10 +84,10 @@ Gem::Specification.new do |spec|
89
84
  spec.version = "${project.version}"
90
85
  spec.authors = ["yuokada"]
91
86
  spec.summary = %[Orc output plugin for Embulk]
92
- spec.description = %[Dumps records to Orc.]
87
+ spec.description = %[Dumps records to Orc format file.]
93
88
  spec.email = ["callistoiv+git@gmail.com"]
94
89
  spec.licenses = ["MIT"]
95
- # TODO set this: spec.homepage = "https://github.com/callistoiv+git/embulk-output-orc"
90
+ spec.homepage = "https://github.com/yuokada/embulk-output-orc"
96
91
 
97
92
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
98
93
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
@@ -53,6 +53,7 @@ exec:
53
53
 
54
54
  out:
55
55
  type: orc
56
+ overwrite: true
56
57
  path_prefix: "/tmp/output"
57
58
  buffer_size: 8000
58
59
  strip_size: 90000
@@ -128,12 +128,14 @@ public class OrcOutputPlugin
128
128
  {
129
129
  PluginTask task = taskSource.loadTask(PluginTask.class);
130
130
 
131
+ if (task.getOverwrite()) {
132
+ OrcOutputPluginHelper.removeOldFile(buildPath(task, taskIndex));
133
+ }
134
+
131
135
  final PageReader reader = new PageReader(schema);
132
136
  Writer writer = createWriter(task, schema, taskIndex);
133
137
 
134
138
  return new OrcTransactionalPageOutput(reader, writer, task);
135
- // Write your code here :)
136
- // throw new UnsupportedOperationException("OrcOutputPlugin.run method is not implemented yet");
137
139
  }
138
140
 
139
141
  private String buildPath(PluginTask task, int processorIndex)
@@ -0,0 +1,26 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import java.io.IOException;
4
+ import java.nio.file.Files;
5
+ import java.nio.file.Path;
6
+ import java.nio.file.Paths;
7
+
8
+ class OrcOutputPluginHelper
9
+ {
10
+ protected OrcOutputPluginHelper()
11
+ {
12
+ throw new UnsupportedOperationException();
13
+ }
14
+
15
+ static void removeOldFile(String fpath)
16
+ {
17
+ Path path = Paths.get(fpath);
18
+ // TODO: Check local file. not HDFS or S3.
19
+ try {
20
+ Files.deleteIfExists(path);
21
+ }
22
+ catch (IOException e) {
23
+ e.printStackTrace();
24
+ }
25
+ }
26
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-21 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: Dumps records to Orc.
41
+ description: Dumps records to Orc format file.
42
42
  email:
43
43
  - callistoiv+git@gmail.com
44
44
  executables: []
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE.txt
50
51
  - README.md
51
52
  - build.gradle
@@ -59,22 +60,20 @@ files:
59
60
  - lib/embulk/output/orc.rb
60
61
  - src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
61
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
63
+ - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
62
64
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
63
- - classpath/activation-1.1.jar
64
65
  - classpath/aircompressor-0.3.jar
65
66
  - classpath/apacheds-i18n-2.0.0-M15.jar
66
67
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
67
68
  - classpath/api-asn1-api-1.0.0-M20.jar
68
69
  - classpath/api-util-1.0.0-M20.jar
69
70
  - classpath/asm-3.1.jar
70
- - classpath/avro-1.7.4.jar
71
- - classpath/commons-beanutils-1.7.0.jar
72
71
  - classpath/commons-cli-1.2.jar
73
72
  - classpath/commons-codec-1.6.jar
74
73
  - classpath/commons-collections-3.2.2.jar
75
74
  - classpath/commons-compress-1.4.1.jar
76
75
  - classpath/commons-configuration-1.6.jar
77
- - classpath/commons-digester-1.8.jar
76
+ - classpath/commons-daemon-1.0.13.jar
78
77
  - classpath/commons-el-1.0.jar
79
78
  - classpath/commons-httpclient-3.1.jar
80
79
  - classpath/commons-io-2.4.jar
@@ -84,70 +83,37 @@ files:
84
83
  - classpath/commons-net-3.1.jar
85
84
  - classpath/curator-client-2.6.0.jar
86
85
  - classpath/curator-framework-2.6.0.jar
87
- - classpath/curator-recipes-2.6.0.jar
88
- - classpath/embulk-output-orc-0.0.2.jar
86
+ - classpath/embulk-output-orc-0.1.0.jar
89
87
  - classpath/gson-2.2.4.jar
90
- - classpath/guice-servlet-3.0.jar
91
88
  - classpath/hadoop-annotations-2.6.4.jar
92
89
  - classpath/hadoop-auth-2.6.4.jar
93
- - classpath/hadoop-client-2.6.4.jar
94
90
  - classpath/hadoop-common-2.6.4.jar
95
91
  - classpath/hadoop-hdfs-2.6.4.jar
96
- - classpath/hadoop-mapreduce-client-app-2.6.4.jar
97
- - classpath/hadoop-mapreduce-client-common-2.6.4.jar
98
- - classpath/hadoop-mapreduce-client-core-2.6.4.jar
99
- - classpath/hadoop-mapreduce-client-jobclient-2.6.4.jar
100
- - classpath/hadoop-mapreduce-client-shuffle-2.6.4.jar
101
- - classpath/hadoop-yarn-api-2.6.4.jar
102
- - classpath/hadoop-yarn-client-2.6.4.jar
103
- - classpath/hadoop-yarn-common-2.6.4.jar
104
- - classpath/hadoop-yarn-server-common-2.6.4.jar
105
- - classpath/hadoop-yarn-server-nodemanager-2.6.4.jar
106
92
  - classpath/hive-storage-api-2.2.1.jar
107
93
  - classpath/htrace-core-3.0.4.jar
108
94
  - classpath/httpclient-4.2.5.jar
109
95
  - classpath/httpcore-4.2.4.jar
110
96
  - classpath/jackson-core-asl-1.9.13.jar
111
- - classpath/jackson-jaxrs-1.9.13.jar
112
97
  - classpath/jackson-mapper-asl-1.9.13.jar
113
- - classpath/jackson-xc-1.9.13.jar
114
- - classpath/jasper-compiler-5.5.23.jar
115
98
  - classpath/jasper-runtime-5.5.23.jar
116
- - classpath/java-xmlbuilder-0.4.jar
117
- - classpath/jaxb-api-2.2.2.jar
118
- - classpath/jaxb-impl-2.2.3-1.jar
119
- - classpath/jersey-client-1.9.jar
120
99
  - classpath/jersey-core-1.9.jar
121
- - classpath/jersey-guice-1.9.jar
122
- - classpath/jersey-json-1.9.jar
123
100
  - classpath/jersey-server-1.9.jar
124
- - classpath/jets3t-0.9.0.jar
125
- - classpath/jettison-1.1.jar
126
101
  - classpath/jetty-6.1.26.jar
127
102
  - classpath/jetty-util-6.1.26.jar
128
103
  - classpath/jline-0.9.94.jar
129
104
  - classpath/jsch-0.1.42.jar
130
105
  - classpath/jsp-api-2.1.jar
131
- - classpath/jsr305-1.3.9.jar
132
- - classpath/kryo-shaded-3.0.3.jar
133
- - classpath/leveldbjni-all-1.8.jar
134
106
  - classpath/log4j-1.2.17.jar
135
- - classpath/minlog-1.3.0.jar
136
107
  - classpath/netty-3.7.0.Final.jar
137
- - classpath/objenesis-2.1.jar
138
108
  - classpath/orc-core-1.4.0.jar
139
- - classpath/orc-mapreduce-1.4.0.jar
140
- - classpath/paranamer-2.3.jar
141
109
  - classpath/protobuf-java-2.5.0.jar
142
110
  - classpath/servlet-api-2.5.jar
143
- - classpath/snappy-java-1.0.4.1.jar
144
- - classpath/stax-api-1.0-2.jar
145
111
  - classpath/xercesImpl-2.9.1.jar
146
112
  - classpath/xml-apis-1.3.04.jar
147
113
  - classpath/xmlenc-0.52.jar
148
114
  - classpath/xz-1.0.jar
149
115
  - classpath/zookeeper-3.4.6.jar
150
- homepage:
116
+ homepage: https://github.com/yuokada/embulk-output-orc
151
117
  licenses:
152
118
  - MIT
153
119
  metadata: {}