embulk-output-orc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c6d1115e79d4012717df2f43f447bfbdd1518a88
4
+ data.tar.gz: 80e8e7f5e470724a33125b4ecce74112ef098d6b
5
+ SHA512:
6
+ metadata.gz: 290aec04feda06d83ddd9cd2995d62a186fbfc3399b90ede431e54eec8e34aac606077919610f446e703ee820e7625ed1d79f9d08378fccd6fcb052f15e8ab31
7
+ data.tar.gz: 899435e450c217c4f8b08fa9ea617efb64b5628ba0cb42b37849a11e5363b96889b67e595b51f2f3b2b396386166d5a5ddd96fd34cfa761f735572262b59f716
@@ -0,0 +1,13 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
13
+ t
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # Orc output plugin for Embulk
2
+
3
+ TODO: Write short description here and build.gradle file.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: no
9
+ * **Resume supported**: no
10
+ * **Cleanup supported**: yes
11
+
12
+ ## Configuration
13
+
14
+ - **option1**: description (integer, required)
15
+ - **option2**: description (string, default: `"myvalue"`)
16
+ - **option3**: description (string, default: `null`)
17
+
18
+ ## Example
19
+
20
+ ```yaml
21
+ out:
22
+ type: orc
23
+ option1: example1
24
+ option2: example2
25
+ ```
26
+
27
+
28
+ ## Build
29
+
30
+ ```
31
+ $ ./gradlew gem # -t to watch change of files and rebuild continuously
32
+ ```
@@ -0,0 +1,108 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "checkstyle"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
15
+ }
16
+
17
+ version = "0.0.1"
18
+
19
+ sourceCompatibility = 1.8
20
+ targetCompatibility = 1.8
21
+
22
+ dependencies {
23
+ compile "org.embulk:embulk-core:0.8.29"
24
+ provided "org.embulk:embulk-core:0.8.29"
25
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
+
27
+ compile "org.apache.orc:orc:1.4.0"
28
+ compile "org.apache.orc:orc-core:1.4.0"
29
+ compile "org.apache.orc:orc-mapreduce:1.4.0"
30
+
31
+ compile "org.apache.hadoop:hadoop-common:2.6.4"
32
+ compile "org.apache.hadoop:hadoop-client:2.6.4"
33
+ // compile "org.apache.hadoop:hadoop-hdfs:2.7.4"
34
+
35
+ testCompile "junit:junit:4.+"
36
+ testCompile "org.embulk:embulk-core:0.8.29:tests"
37
+ testCompile "org.embulk:embulk-standards:0.8.29"
38
+ }
39
+
40
+ task classpath(type: Copy, dependsOn: ["jar"]) {
41
+ doFirst { file("classpath").deleteDir() }
42
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
43
+ into "classpath"
44
+ }
45
+ clean { delete "classpath" }
46
+
47
+ checkstyle {
48
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
49
+ toolVersion = '6.14.1'
50
+ }
51
+ checkstyleMain {
52
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
53
+ ignoreFailures = true
54
+ }
55
+ checkstyleTest {
56
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
57
+ ignoreFailures = true
58
+ }
59
+ task checkstyle(type: Checkstyle) {
60
+ classpath = sourceSets.main.output + sourceSets.test.output
61
+ source = sourceSets.main.allJava + sourceSets.test.allJava
62
+ }
63
+
64
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
65
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
66
+ script "${project.name}.gemspec"
67
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
68
+ }
69
+
70
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
71
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
72
+ script "pkg/${project.name}-${project.version}.gem"
73
+ }
74
+
75
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
76
+ doLast {
77
+ println "> Build succeeded."
78
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
79
+ }
80
+ }
81
+
82
+ task gemspec {
83
+ ext.gemspecFile = file("${project.name}.gemspec")
84
+ inputs.file "build.gradle"
85
+ outputs.file gemspecFile
86
+ doLast { gemspecFile.write($/
87
+ Gem::Specification.new do |spec|
88
+ spec.name = "${project.name}"
89
+ spec.version = "${project.version}"
90
+ spec.authors = ["yuokada"]
91
+ spec.summary = %[Orc output plugin for Embulk]
92
+ spec.description = %[Dumps records to Orc.]
93
+ spec.email = ["callistoiv+git@gmail.com"]
94
+ spec.licenses = ["MIT"]
95
+ # TODO set this: spec.homepage = "https://github.com/callistoiv+git/embulk-output-orc"
96
+
97
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
98
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
99
+ spec.require_paths = ["lib"]
100
+
101
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
102
+ spec.add_development_dependency 'bundler', ['~> 1.0']
103
+ spec.add_development_dependency 'rake', ['>= 10.0']
104
+ end
105
+ /$)
106
+ }
107
+ }
108
+ clean { delete "${project.name}.gemspec" }
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -0,0 +1,56 @@
1
+ # in:
2
+ # type: file
3
+ # path_prefix: example/example.csv
4
+ # parser:
5
+ # type: csv
6
+ # charset: UTF-8
7
+ # newline: CRLF
8
+ # null_string: 'NULL'
9
+ # skip_header_lines: 1
10
+ # comment_line_marker: '#'
11
+ # columns:
12
+ # #- {name: time, type: timestamp, format: "%Y-%m-%d"}
13
+ # - {name: id, type: long}
14
+ # - {name: name, type: string}
15
+ # - {name: score, type: double}
16
+ # - {name: json, type: json}
17
+ #filters:
18
+ # - type: column
19
+ # columns:
20
+ # - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
21
+ # - {name: name, default: "foo"}
22
+ # - {name: foo, default: 1, type: long}
23
+ # - {name: id}
24
+ # - {name: copy_score, src: score}
25
+ # - {name: json, default: "{\"foo\":\"FOO\"}"}
26
+ # - {name: $.json.foo}
27
+ # - {name: $.json.copy_foo, src: $.json.foo}
28
+ in:
29
+ type: file
30
+ path_prefix: example/sample.csv
31
+ parser:
32
+ type: csv
33
+ charset: UTF-8
34
+ newline: CRLF
35
+ null_string: 'NULL'
36
+ skip_header_lines: 0
37
+ comment_line_marker: '#'
38
+ columns:
39
+ - {name: myid, type: long}
40
+ - {name: named, type: string}
41
+ - {name: x_flag, type: boolean}
42
+ - {name: pit_rate, type: double}
43
+ - {name: score, type: long}
44
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
45
+ - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
46
+
47
+ exec:
48
+ max_threads: 2 # run at most 8 tasks concurrently
49
+ min_output_tasks: 1 # disable page scattering
50
+
51
+ #out:
52
+ # type: stdout
53
+
54
+ out:
55
+ type: orc
56
+ path_prefix: "/tmp/output"