embulk-output-orc 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c842edfe45c7e992faae16afd3331c7f8ecf256
4
- data.tar.gz: a6b3d098e7b012a07f4870e2fc92c898b09e1560
3
+ metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
4
+ data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
5
5
  SHA512:
6
- metadata.gz: 3bf5bc9e310496191419ee1e9a76cf9912321e9df2d178081c324ed46f83e7600e04912df59a7ebf38e0e3761be4b8ea1931cae549d681a8ba5b6d35b1e19990
7
- data.tar.gz: bbec4349adf56b4c684084a39f61eee8dd41152a11de02c2b5573e5e933cf8d76f280e7110f030cb31b157fdfc9e7fa457f575cd8a6b5b4d8df8ea484a51f358
6
+ metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
7
+ data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
@@ -0,0 +1,12 @@
1
+ language: java
2
+ jdk:
3
+ - oraclejdk8
4
+
5
+ cache:
6
+ directories: # run "travis cache --delete" to delete caches
7
+ - $HOME/.gradle
8
+
9
+ sudo: false
10
+ script:
11
+ - ./gradlew --info checkstyle
12
+ - ./gradlew --info check
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Orc output plugin for Embulk
2
2
 
3
- TODO: Write short description here and build.gradle file.
3
+ [![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
4
4
 
5
5
  ## Overview
6
6
 
@@ -11,17 +11,25 @@ TODO: Write short description here and build.gradle file.
11
11
 
12
12
  ## Configuration
13
13
 
14
- - **option1**: description (integer, required)
15
- - **option2**: description (string, default: `"myvalue"`)
16
- - **option3**: description (string, default: `null`)
14
+ - **path_prefix**: A prefix of output path. (string, required)
15
+ - **file_ext**: An extension of output file. (string, default: `.orc`)
16
+ - **sequence_format**: (string, default: `.%03d`)
17
+ - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
18
+ - **strip_size**: Set the ORC strip size (integer, default: `100000`)
19
+ - **compression_kind**: description (string, default: `'ZLIB'`)
20
+ - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
21
+ - **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
17
22
 
18
23
  ## Example
19
24
 
20
25
  ```yaml
21
26
  out:
22
27
  type: orc
23
- option1: example1
24
- option2: example2
28
+ path_prefix: "/tmp/output"
29
+ buffer_size: 8000
30
+ strip_size: 90000
31
+ compression_kind: ZLIB
32
+ overwrite: true
25
33
  ```
26
34
 
27
35
 
@@ -14,7 +14,7 @@ configurations {
14
14
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
15
15
  }
16
16
 
17
- version = "0.0.2"
17
+ version = "0.1.0"
18
18
 
19
19
  sourceCompatibility = 1.8
20
20
  targetCompatibility = 1.8
@@ -22,15 +22,10 @@ targetCompatibility = 1.8
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.29"
24
24
  provided "org.embulk:embulk-core:0.8.29"
25
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
25
 
27
26
  compile "org.apache.orc:orc:1.4.0"
28
27
  compile "org.apache.orc:orc-core:1.4.0"
29
- compile "org.apache.orc:orc-mapreduce:1.4.0"
30
-
31
- compile "org.apache.hadoop:hadoop-common:2.6.4"
32
- compile "org.apache.hadoop:hadoop-client:2.6.4"
33
- // compile "org.apache.hadoop:hadoop-hdfs:2.7.4"
28
+ compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
34
29
 
35
30
  testCompile "junit:junit:4.+"
36
31
  testCompile "org.embulk:embulk-core:0.8.29:tests"
@@ -89,10 +84,10 @@ Gem::Specification.new do |spec|
89
84
  spec.version = "${project.version}"
90
85
  spec.authors = ["yuokada"]
91
86
  spec.summary = %[Orc output plugin for Embulk]
92
- spec.description = %[Dumps records to Orc.]
87
+ spec.description = %[Dumps records to Orc format file.]
93
88
  spec.email = ["callistoiv+git@gmail.com"]
94
89
  spec.licenses = ["MIT"]
95
- # TODO set this: spec.homepage = "https://github.com/callistoiv+git/embulk-output-orc"
90
+ spec.homepage = "https://github.com/yuokada/embulk-output-orc"
96
91
 
97
92
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
98
93
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
@@ -53,6 +53,7 @@ exec:
53
53
 
54
54
  out:
55
55
  type: orc
56
+ overwrite: true
56
57
  path_prefix: "/tmp/output"
57
58
  buffer_size: 8000
58
59
  strip_size: 90000
@@ -128,12 +128,14 @@ public class OrcOutputPlugin
128
128
  {
129
129
  PluginTask task = taskSource.loadTask(PluginTask.class);
130
130
 
131
+ if (task.getOverwrite()) {
132
+ OrcOutputPluginHelper.removeOldFile(buildPath(task, taskIndex));
133
+ }
134
+
131
135
  final PageReader reader = new PageReader(schema);
132
136
  Writer writer = createWriter(task, schema, taskIndex);
133
137
 
134
138
  return new OrcTransactionalPageOutput(reader, writer, task);
135
- // Write your code here :)
136
- // throw new UnsupportedOperationException("OrcOutputPlugin.run method is not implemented yet");
137
139
  }
138
140
 
139
141
  private String buildPath(PluginTask task, int processorIndex)
@@ -0,0 +1,26 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import java.io.IOException;
4
+ import java.nio.file.Files;
5
+ import java.nio.file.Path;
6
+ import java.nio.file.Paths;
7
+
8
+ class OrcOutputPluginHelper
9
+ {
10
+ protected OrcOutputPluginHelper()
11
+ {
12
+ throw new UnsupportedOperationException();
13
+ }
14
+
15
+ static void removeOldFile(String fpath)
16
+ {
17
+ Path path = Paths.get(fpath);
18
+ // TODO: Check local file. not HDFS or S3.
19
+ try {
20
+ Files.deleteIfExists(path);
21
+ }
22
+ catch (IOException e) {
23
+ e.printStackTrace();
24
+ }
25
+ }
26
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-21 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: Dumps records to Orc.
41
+ description: Dumps records to Orc format file.
42
42
  email:
43
43
  - callistoiv+git@gmail.com
44
44
  executables: []
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE.txt
50
51
  - README.md
51
52
  - build.gradle
@@ -59,22 +60,20 @@ files:
59
60
  - lib/embulk/output/orc.rb
60
61
  - src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
61
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
63
+ - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
62
64
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
63
- - classpath/activation-1.1.jar
64
65
  - classpath/aircompressor-0.3.jar
65
66
  - classpath/apacheds-i18n-2.0.0-M15.jar
66
67
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
67
68
  - classpath/api-asn1-api-1.0.0-M20.jar
68
69
  - classpath/api-util-1.0.0-M20.jar
69
70
  - classpath/asm-3.1.jar
70
- - classpath/avro-1.7.4.jar
71
- - classpath/commons-beanutils-1.7.0.jar
72
71
  - classpath/commons-cli-1.2.jar
73
72
  - classpath/commons-codec-1.6.jar
74
73
  - classpath/commons-collections-3.2.2.jar
75
74
  - classpath/commons-compress-1.4.1.jar
76
75
  - classpath/commons-configuration-1.6.jar
77
- - classpath/commons-digester-1.8.jar
76
+ - classpath/commons-daemon-1.0.13.jar
78
77
  - classpath/commons-el-1.0.jar
79
78
  - classpath/commons-httpclient-3.1.jar
80
79
  - classpath/commons-io-2.4.jar
@@ -84,70 +83,37 @@ files:
84
83
  - classpath/commons-net-3.1.jar
85
84
  - classpath/curator-client-2.6.0.jar
86
85
  - classpath/curator-framework-2.6.0.jar
87
- - classpath/curator-recipes-2.6.0.jar
88
- - classpath/embulk-output-orc-0.0.2.jar
86
+ - classpath/embulk-output-orc-0.1.0.jar
89
87
  - classpath/gson-2.2.4.jar
90
- - classpath/guice-servlet-3.0.jar
91
88
  - classpath/hadoop-annotations-2.6.4.jar
92
89
  - classpath/hadoop-auth-2.6.4.jar
93
- - classpath/hadoop-client-2.6.4.jar
94
90
  - classpath/hadoop-common-2.6.4.jar
95
91
  - classpath/hadoop-hdfs-2.6.4.jar
96
- - classpath/hadoop-mapreduce-client-app-2.6.4.jar
97
- - classpath/hadoop-mapreduce-client-common-2.6.4.jar
98
- - classpath/hadoop-mapreduce-client-core-2.6.4.jar
99
- - classpath/hadoop-mapreduce-client-jobclient-2.6.4.jar
100
- - classpath/hadoop-mapreduce-client-shuffle-2.6.4.jar
101
- - classpath/hadoop-yarn-api-2.6.4.jar
102
- - classpath/hadoop-yarn-client-2.6.4.jar
103
- - classpath/hadoop-yarn-common-2.6.4.jar
104
- - classpath/hadoop-yarn-server-common-2.6.4.jar
105
- - classpath/hadoop-yarn-server-nodemanager-2.6.4.jar
106
92
  - classpath/hive-storage-api-2.2.1.jar
107
93
  - classpath/htrace-core-3.0.4.jar
108
94
  - classpath/httpclient-4.2.5.jar
109
95
  - classpath/httpcore-4.2.4.jar
110
96
  - classpath/jackson-core-asl-1.9.13.jar
111
- - classpath/jackson-jaxrs-1.9.13.jar
112
97
  - classpath/jackson-mapper-asl-1.9.13.jar
113
- - classpath/jackson-xc-1.9.13.jar
114
- - classpath/jasper-compiler-5.5.23.jar
115
98
  - classpath/jasper-runtime-5.5.23.jar
116
- - classpath/java-xmlbuilder-0.4.jar
117
- - classpath/jaxb-api-2.2.2.jar
118
- - classpath/jaxb-impl-2.2.3-1.jar
119
- - classpath/jersey-client-1.9.jar
120
99
  - classpath/jersey-core-1.9.jar
121
- - classpath/jersey-guice-1.9.jar
122
- - classpath/jersey-json-1.9.jar
123
100
  - classpath/jersey-server-1.9.jar
124
- - classpath/jets3t-0.9.0.jar
125
- - classpath/jettison-1.1.jar
126
101
  - classpath/jetty-6.1.26.jar
127
102
  - classpath/jetty-util-6.1.26.jar
128
103
  - classpath/jline-0.9.94.jar
129
104
  - classpath/jsch-0.1.42.jar
130
105
  - classpath/jsp-api-2.1.jar
131
- - classpath/jsr305-1.3.9.jar
132
- - classpath/kryo-shaded-3.0.3.jar
133
- - classpath/leveldbjni-all-1.8.jar
134
106
  - classpath/log4j-1.2.17.jar
135
- - classpath/minlog-1.3.0.jar
136
107
  - classpath/netty-3.7.0.Final.jar
137
- - classpath/objenesis-2.1.jar
138
108
  - classpath/orc-core-1.4.0.jar
139
- - classpath/orc-mapreduce-1.4.0.jar
140
- - classpath/paranamer-2.3.jar
141
109
  - classpath/protobuf-java-2.5.0.jar
142
110
  - classpath/servlet-api-2.5.jar
143
- - classpath/snappy-java-1.0.4.1.jar
144
- - classpath/stax-api-1.0-2.jar
145
111
  - classpath/xercesImpl-2.9.1.jar
146
112
  - classpath/xml-apis-1.3.04.jar
147
113
  - classpath/xmlenc-0.52.jar
148
114
  - classpath/xz-1.0.jar
149
115
  - classpath/zookeeper-3.4.6.jar
150
- homepage:
116
+ homepage: https://github.com/yuokada/embulk-output-orc
151
117
  licenses:
152
118
  - MIT
153
119
  metadata: {}