embulk-filter-copy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/CHANGELOG.md +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +67 -0
  6. data/build.gradle +104 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/example/config.yml +28 -0
  10. data/example/data.tsv +5 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +169 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/filter/copy.rb +8 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/java/org/embulk/filter/copy/CopyFilterPlugin.java +149 -0
  18. data/src/main/java/org/embulk/filter/copy/forward/ForwardBaseTask.java +17 -0
  19. data/src/main/java/org/embulk/filter/copy/forward/InForwardEventReader.java +147 -0
  20. data/src/main/java/org/embulk/filter/copy/forward/InForwardService.java +187 -0
  21. data/src/main/java/org/embulk/filter/copy/forward/InForwardVisitor.java +63 -0
  22. data/src/main/java/org/embulk/filter/copy/forward/OutForwardEventBuilder.java +135 -0
  23. data/src/main/java/org/embulk/filter/copy/forward/OutForwardService.java +170 -0
  24. data/src/main/java/org/embulk/filter/copy/forward/OutForwardVisitor.java +63 -0
  25. data/src/main/java/org/embulk/filter/copy/plugin/InternalForwardInputPlugin.java +111 -0
  26. data/src/main/java/org/embulk/filter/copy/service/EmbulkExecutorService.java +111 -0
  27. data/src/main/java/org/embulk/filter/copy/service/StandardColumnVisitor.java +64 -0
  28. data/src/main/java/org/embulk/filter/copy/util/ElapsedTime.java +165 -0
  29. data/src/test/java/org/embulk/filter/copy/TestCopyFilterPlugin.java +5 -0
  30. data/src/test/java/org/embulk/filter/copy/plugin/TestInternalForwardInputPlugin.java +5 -0
  31. metadata +111 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3044893452487bfd8e401d210931f2cd8c95b750
4
+ data.tar.gz: bcd788f6fcbf6f5e50c1867bbb85869fbe7781b1
5
+ SHA512:
6
+ metadata.gz: e7e68823705f1747c65bae165c6a573e5f3be6befb1b2ff03fedb95bac628350250405915445a247edb98cd6388dd3d73b57e631b45454bf8bdc44823c31d323
7
+ data.tar.gz: 0c16332e95084910ffe7f128abf68cac89e67c05151bf1bbbe86be666ab43e3ecb57e18c0e6725d17c143cf6a368796d033344d6e70d1ad3d250f8323b9e3099
@@ -0,0 +1,12 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
@@ -0,0 +1,3 @@
1
+ 0.0.1 (2017-06-07)
2
+ ==================
3
+ - first release.
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,67 @@
1
+ # Copy filter plugin for Embulk
2
+
3
+ Copy records and run another embulk by using them as input data source.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ - **config**: another embulk configurations except `in`.
12
+ - ref. http://www.embulk.org/docs/built-in.html#embulk-configuration-file-format
13
+
14
+ ## Example
15
+
16
+ ```yaml
17
+ filters:
18
+ - type: copy
19
+ config:
20
+ filters:
21
+ - type: remove_columns
22
+ remove: ["id"]
23
+ out:
24
+ type: stdout
25
+ exec:
26
+ max_threads: 8
27
+ ```
28
+
29
+ ## Note
30
+
31
+ - This plugin works only on Java 1.8 or later.
32
+ - This plugin is **experimental** yet, so the specification may be changed.
33
+ - This plugin has more options than I write, but I do not write them because you do not have to change them currently.
34
+ - This plugin has no test yet, so may have some bugs.
35
+ - This plugin does not work on [embulk-executor-mapreduce](https://github.com/embulk/embulk-executor-mapreduce) yet.
36
+ - This plugin uses lots of memory now, because embulk run twice.
37
+
38
+ ## Dependencies
39
+ - https://github.com/okumin/influent
40
+ - https://github.com/komamitsu/fluency
41
+ - This plugin must use 1.1.0 for using the same msgpack version as embulk.
42
+
43
+ ## Development
44
+
45
+ ### Run example:
46
+
47
+ ```shell
48
+ $ ./gradlew classpath
49
+ $ embulk run example/config.yml -Ilib
50
+ ```
51
+
52
+ ### Run test:
53
+
54
+ TBD
55
+
56
+ ### Release gem:
57
+ Fix [build.gradle](./build.gradle), then
58
+
59
+
60
+ ```shell
61
+ $ ./gradlew gemPush
62
+
63
+ ```
64
+
65
+ ## ChangeLog
66
+
67
+ [CHANGELOG.md](./CHANGELOG.md)
@@ -0,0 +1,104 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "checkstyle"
6
+ }
7
+
8
+ import com.github.jrubygradle.JRubyExec
9
+ repositories {
10
+ mavenCentral()
11
+ jcenter()
12
+ }
13
+ configurations {
14
+ provided
15
+ }
16
+
17
+ version = "0.0.1"
18
+
19
+ sourceCompatibility = 1.8
20
+ targetCompatibility = 1.8
21
+
22
+ dependencies {
23
+ compile "org.embulk:embulk-core:0.8.23"
24
+ provided "org.embulk:embulk-core:0.8.23"
25
+ compile "com.okumin:influent-java:0.2.+"
26
+ // The latest version of fluency is 1.3.0, but
27
+ // This plugin use 1.1.0 for using the same msgpack version as embulk.
28
+ // ref. https://github.com/komamitsu/fluency/compare/1.1.0...1.3.0
29
+ compile "org.komamitsu:fluency:1.1.0"
30
+ testCompile "junit:junit:4.+"
31
+ }
32
+
33
+ task classpath(type: Copy, dependsOn: ["jar"]) {
34
+ doFirst { file("classpath").deleteDir() }
35
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
36
+ into "classpath"
37
+ }
38
+ clean { delete "classpath" }
39
+
40
+ checkstyle {
41
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
42
+ toolVersion = '6.14.1'
43
+ }
44
+ checkstyleMain {
45
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
46
+ ignoreFailures = true
47
+ }
48
+ checkstyleTest {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ task checkstyle(type: Checkstyle) {
53
+ classpath = sourceSets.main.output + sourceSets.test.output
54
+ source = sourceSets.main.allJava + sourceSets.test.allJava
55
+ }
56
+
57
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
58
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
59
+ script "${project.name}.gemspec"
60
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
61
+ }
62
+
63
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
64
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
65
+ script "pkg/${project.name}-${project.version}.gem"
66
+ }
67
+
68
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
69
+ doLast {
70
+ println "> Build succeeded."
71
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
72
+ }
73
+ }
74
+
75
+ task gemspec {
76
+ ext.gemspecFile = file("${project.name}.gemspec")
77
+ inputs.file "build.gradle"
78
+ outputs.file gemspecFile
79
+ doLast { gemspecFile.write($/
80
+ Gem::Specification.new do |spec|
81
+ spec.name = "${project.name}"
82
+ spec.version = "${project.version}"
83
+ spec.authors = ["Civitaspo"]
84
+ spec.summary = %[Copy filter plugin for Embulk]
85
+ spec.description = %[Copy records and run another embulk by using them as input data source.]
86
+ spec.email = ["civitaspo@gmail.com"]
87
+ spec.licenses = ["MIT"]
88
+ spec.homepage = "https://github.com/civitaspo/embulk-filter-copy"
89
+
90
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
91
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
92
+ spec.require_paths = ["lib"]
93
+
94
+ unless ENV_JAVA['java.specification.version'] >= '1.8'
95
+ warn "[WARN] `embulk-filter-copy` plugin requires 'java.specification.version' >= 1.8, but currently #{ENV_JAVA['java.specification.version']}."
96
+ end
97
+
98
+ spec.add_development_dependency 'bundler', ['~> 1.0']
99
+ spec.add_development_dependency 'rake', ['>= 10.0']
100
+ end
101
+ /$)
102
+ }
103
+ }
104
+ clean { delete "${project.name}.gemspec" }
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>