embulk-filter-split_column 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: deee7bfaf00675e383c3d67aad12ace44279dda0
4
+ data.tar.gz: 07ee2ea8d05dd425329448b06fec165fe2443155
5
+ SHA512:
6
+ metadata.gz: e563efc86a9849b493bfaead39ec7a7caf35edfed42a815786b9aa8e1f6f7ce78b5d308c3ad6f5c3e486d43b917b5d4357c2cf51872d149b1b62c01a35b54ca2
7
+ data.tar.gz: 3aded9165e9f35a182b393ccf8eb4797b79fd5ab570aeaa7fe9d8e90825584e1c037659d6acc8c366ea0daa320d9c6209c7369311082b9b6933e7d69cee9b011
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # Split Column filter plugin for Embulk
2
+
3
+ A filter plugin for Embulk to split one string column to several any type columns.
4
+
5
+ ## Configuration
6
+
7
+ - **delimiter**: delimiter for split column (string, required)
8
+ - **target_key**: string column key you want to split(string, required)
9
+ - **output_columns**: description (array of hash, required)
10
+ - This values is same for columns in parser
11
+
12
+ ## Example
13
+
14
+ Say input.csv is as follows:
15
+
16
+ ```
17
+ id,account,time,purchase,comment
18
+ 1,32864,2015-01-27 19:23:49,20150127,a|1|1.1|True|2016-01-26
19
+ 2,14824,2015-01-27 19:01:23,20150127,b|2|2.2|False|2016-01-27
20
+ 3,27559,2015-01-28 02:20:02,20150128,c|3|3.3|False|2016-01-28
21
+ 4,11270,2015-01-29 11:54:36,20150129,d|4|4.4|True|2016-01-29
22
+ ```
23
+
24
+ In parse phase, split by ',':
25
+
26
+ ```yaml
27
+ parser:
28
+ type: csv
29
+ delimiter: ','
30
+ columns:
31
+ - {name: id, type: long}
32
+ - {name: account, type: long}
33
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
34
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
35
+ - {name: comment, type: string}
36
+ ```
37
+ ```
38
+ +---------+--------------+-------------------------+-------------------------+--------------------------+
39
+ | id:long | account:long | time:timestamp | purchase:timestamp | comment:string |
40
+ +---------+--------------+-------------------------+-------------------------+--------------------------+
41
+ | 1 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | a|1|1.1|True|2016-01-26 |
42
+ | 2 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | b|2|2.2|False|2016-01-27 |
43
+ | 3 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | c|3|3.3|False|2016-01-28 |
44
+ | 4 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | d|4|4.4|True|2016-01-29 |
45
+ +---------+--------------+-------------------------+-------------------------+--------------------------+
46
+ ```
47
+
48
+ In additionally, you want to split comment by '|' using split_column filter:
49
+
50
+ ```yaml
51
+ filters:
52
+ - type: split_column
53
+ delimiter: '|'
54
+ target_key: comment
55
+ output_columns:
56
+ - {name: alph, type: string}
57
+ - {name: num, type: long}
58
+ - {name: dbl, type: double}
59
+ - {name: bool, type: boolean}
60
+ - {name: ts, type: timestamp, format: '%Y-%m-%d'}
61
+ ```
62
+ ```
63
+ +---------+--------------+-------------------------+-------------------------+-------------+----------+------------+--------------+-------------------------+
64
+ | id:long | account:long | time:timestamp | purchase:timestamp | alph:string | num:long | dbl:double | bool:boolean | ts:timestamp |
65
+ +---------+--------------+-------------------------+-------------------------+-------------+----------+------------+--------------+-------------------------+
66
+ | 1 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | a | 1 | 1.1 | true | 2016-01-26 00:00:00 UTC |
67
+ | 2 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | b | 2 | 2.2 | false | 2016-01-27 00:00:00 UTC |
68
+ | 3 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | c | 3 | 3.3 | false | 2016-01-28 00:00:00 UTC |
69
+ | 4 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | d | 4 | 4.4 | true | 2016-01-29 00:00:00 UTC |
70
+ +---------+--------------+-------------------------+-------------------------+-------------+----------+------------+--------------+-------------------------+
71
+ ```
72
+
73
+ ## Todo
74
+
75
+ - Write Test
76
+ - Support default value
77
+ - Support exception to skip row
78
+
79
+ ## Build
80
+
81
+ ```
82
+ $ ./gradlew gem
83
+ ```
data/build.gradle ADDED
@@ -0,0 +1,94 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "checkstyle"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.1.1"
17
+
18
+ sourceCompatibility = 1.7
19
+ targetCompatibility = 1.7
20
+
21
+ dependencies {
22
+ compile "org.embulk:embulk-core:0.8.8"
23
+ provided "org.embulk:embulk-core:0.8.8"
24
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
25
+ testCompile "junit:junit:4.+"
26
+ }
27
+
28
+ task classpath(type: Copy, dependsOn: ["jar"]) {
29
+ doFirst { file("classpath").deleteDir() }
30
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
31
+ into "classpath"
32
+ }
33
+ clean { delete "classpath" }
34
+
35
+ checkstyle {
36
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
37
+ toolVersion = '6.14.1'
38
+ }
39
+ checkstyleMain {
40
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
41
+ ignoreFailures = true
42
+ }
43
+ checkstyleTest {
44
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
45
+ ignoreFailures = true
46
+ }
47
+ task checkstyle(type: Checkstyle) {
48
+ classpath = sourceSets.main.output + sourceSets.test.output
49
+ source = sourceSets.main.allJava + sourceSets.test.allJava
50
+ }
51
+
52
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
53
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
54
+ script "${project.name}.gemspec"
55
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
56
+ }
57
+
58
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
59
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
60
+ script "pkg/${project.name}-${project.version}.gem"
61
+ }
62
+
63
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
64
+ println "> Build succeeded."
65
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
66
+ }
67
+
68
+ task gemspec {
69
+ ext.gemspecFile = file("${project.name}.gemspec")
70
+ inputs.file "build.gradle"
71
+ outputs.file gemspecFile
72
+ doLast { gemspecFile.write($/
73
+ Gem::Specification.new do |spec|
74
+ spec.name = "${project.name}"
75
+ spec.version = "${project.version}"
76
+ spec.authors = ["yskn67"]
77
+ spec.summary = %[A filter plugin for Embulk to split one string column to several any type columns]
78
+ spec.description = %[A filter plugin for Embulk to split one string column to several any type columns.]
79
+ spec.email = ["yskn1967@gmail.com"]
80
+ spec.licenses = ["MIT"]
81
+ spec.homepage = "https://github.com/yskn67/embulk-filter-split_column"
82
+
83
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
84
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
85
+ spec.require_paths = ["lib"]
86
+
87
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
88
+ spec.add_development_dependency 'bundler', ['~> 1.0']
89
+ spec.add_development_dependency 'rake', ['>= 10.0']
90
+ end
91
+ /$)
92
+ }
93
+ }
94
+ clean { delete "${project.name}.gemspec" }
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Jan 13 12:41:02 JST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
data/gradlew ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # Attempt to set APP_HOME
46
+ # Resolve links: $0 may be a link
47
+ PRG="$0"
48
+ # Need this for relative symlinks.
49
+ while [ -h "$PRG" ] ; do
50
+ ls=`ls -ld "$PRG"`
51
+ link=`expr "$ls" : '.*-> \(.*\)$'`
52
+ if expr "$link" : '/.*' > /dev/null; then
53
+ PRG="$link"
54
+ else
55
+ PRG=`dirname "$PRG"`"/$link"
56
+ fi
57
+ done
58
+ SAVED="`pwd`"
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
60
+ APP_HOME="`pwd -P`"
61
+ cd "$SAVED" >/dev/null
62
+
63
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
+
65
+ # Determine the Java command to use to start the JVM.
66
+ if [ -n "$JAVA_HOME" ] ; then
67
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68
+ # IBM's JDK on AIX uses strange locations for the executables
69
+ JAVACMD="$JAVA_HOME/jre/sh/java"
70
+ else
71
+ JAVACMD="$JAVA_HOME/bin/java"
72
+ fi
73
+ if [ ! -x "$JAVACMD" ] ; then
74
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75
+
76
+ Please set the JAVA_HOME variable in your environment to match the
77
+ location of your Java installation."
78
+ fi
79
+ else
80
+ JAVACMD="java"
81
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82
+
83
+ Please set the JAVA_HOME variable in your environment to match the
84
+ location of your Java installation."
85
+ fi
86
+
87
+ # Increase the maximum file descriptors if we can.
88
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89
+ MAX_FD_LIMIT=`ulimit -H -n`
90
+ if [ $? -eq 0 ] ; then
91
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92
+ MAX_FD="$MAX_FD_LIMIT"
93
+ fi
94
+ ulimit -n $MAX_FD
95
+ if [ $? -ne 0 ] ; then
96
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
97
+ fi
98
+ else
99
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100
+ fi
101
+ fi
102
+
103
+ # For Darwin, add options to specify how the application appears in the dock
104
+ if $darwin; then
105
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106
+ fi
107
+
108
+ # For Cygwin, switch paths to Windows format before running java
109
+ if $cygwin ; then
110
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
113
+
114
+ # We build the pattern for arguments to be converted via cygpath
115
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116
+ SEP=""
117
+ for dir in $ROOTDIRSRAW ; do
118
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
119
+ SEP="|"
120
+ done
121
+ OURCYGPATTERN="(^($ROOTDIRS))"
122
+ # Add a user-defined pattern to the cygpath arguments
123
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125
+ fi
126
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
127
+ i=0
128
+ for arg in "$@" ; do
129
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131
+
132
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134
+ else
135
+ eval `echo args$i`="\"$arg\""
136
+ fi
137
+ i=$((i+1))
138
+ done
139
+ case $i in
140
+ (0) set -- ;;
141
+ (1) set -- "$args0" ;;
142
+ (2) set -- "$args0" "$args1" ;;
143
+ (3) set -- "$args0" "$args1" "$args2" ;;
144
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150
+ esac
151
+ fi
152
+
153
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154
+ function splitJvmOpts() {
155
+ JVM_OPTS=("$@")
156
+ }
157
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
+
160
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "split_column", "org.embulk.filter.split_column.SplitColumnFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,168 @@
1
+ package org.embulk.filter.split_column;
2
+
3
+ import org.apache.commons.lang3.StringUtils;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.ConfigDiff;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.Task;
9
+ import org.embulk.config.TaskSource;
10
+ import org.embulk.spi.Column;
11
+ import org.embulk.spi.ColumnConfig;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Page;
15
+ import org.embulk.spi.PageBuilder;
16
+ import org.embulk.spi.PageOutput;
17
+ import org.embulk.spi.PageReader;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.SchemaConfig;
20
+ import org.embulk.spi.time.TimestampParser;
21
+ import org.embulk.spi.type.Type;
22
+ import org.embulk.spi.type.Types;
23
+ import org.embulk.spi.util.Timestamps;
24
+
25
+ import org.embulk.spi.DataException;
26
+ import org.embulk.spi.time.TimestampParseException;
27
+
28
+ import com.google.common.base.Optional;
29
+ import com.google.common.collect.ImmutableList;
30
+
31
+ public class SplitColumnFilterPlugin
32
+ implements FilterPlugin
33
+ {
34
+ public interface PluginTask
35
+ extends Task, TimestampParser.Task
36
+ {
37
+ @Config("delimiter")
38
+ @ConfigDefault("\",\"")
39
+ public String getDelimiter();
40
+
41
+ @Config("target_key")
42
+ public String getTargetKey();
43
+
44
+ @Config("output_columns")
45
+ public SchemaConfig getOutputColumns();
46
+ }
47
+
48
+ @Override
49
+ public void transaction(ConfigSource config, Schema inputSchema,
50
+ FilterPlugin.Control control)
51
+ {
52
+ PluginTask task = config.loadConfig(PluginTask.class);
53
+
54
+ ImmutableList.Builder<Column> builder = ImmutableList.builder();
55
+ String targetColumnName = task.getTargetKey();
56
+ int i = 0;
57
+ for (Column inputColumn: inputSchema.getColumns()) {
58
+ String columnName = inputColumn.getName();
59
+ if (columnName.equals(targetColumnName)) {
60
+ // Separate target_key column
61
+ for (ColumnConfig outputColumnConfig : task.getOutputColumns().getColumns()) {
62
+ Column outputColumn = outputColumnConfig.toColumn(i++);
63
+ builder.add(outputColumn);
64
+ }
65
+ continue;
66
+ }
67
+ Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
68
+ builder.add(outputColumn);
69
+ }
70
+ Schema outputSchema = new Schema(builder.build());
71
+ control.run(task.dump(), outputSchema);
72
+ }
73
+
74
+ @Override
75
+ public PageOutput open(TaskSource taskSource, final Schema inputSchema,
76
+ final Schema outputSchema, final PageOutput output)
77
+ {
78
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
79
+ final Column targetColumn = inputSchema.lookupColumn(task.getTargetKey());
80
+ final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getOutputColumns());
81
+
82
+ return new PageOutput() {
83
+ private PageReader reader = new PageReader(inputSchema);
84
+ private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
85
+
86
+ @Override
87
+ public void finish() {
88
+ builder.finish();
89
+ }
90
+
91
+ @Override
92
+ public void close() {
93
+ builder.close();
94
+ }
95
+
96
+ @Override
97
+ public void add(Page page) {
98
+ reader.setPage(page);
99
+ while (reader.nextRecord()) {
100
+ int cur = 0;
101
+ for (Column column: inputSchema.getColumns()) {
102
+ if (column.getName().equals(targetColumn.getName())) {
103
+ String[] words = StringUtils.split(reader.getString(column),task.getDelimiter());
104
+ SchemaConfig outputSchemaConfig = task.getOutputColumns();
105
+ // TODO: support skipping row
106
+ if (outputSchemaConfig.size() != words.length) {
107
+ String message = String.format("outputColumn has %d columns but value was separated in %d",
108
+ outputSchemaConfig.size(),
109
+ words.length
110
+ );
111
+ throw new SplitColumnValidateException(message);
112
+ }
113
+ // TODO: support default value
114
+ // TODO: throw exception
115
+ int i = 0;
116
+ for (ColumnConfig outputColumnConfig: outputSchemaConfig.getColumns()) {
117
+ Column outputColumn = outputSchema.lookupColumn(outputColumnConfig.getName());
118
+ Type outputColumnType = outputColumn.getType();
119
+ if (Types.STRING.equals(outputColumnType)) {
120
+ builder.setString(cur++, words[i++]);
121
+ } else if (Types.BOOLEAN.equals(outputColumnType)) {
122
+ builder.setBoolean(cur++, Boolean.parseBoolean(words[i++]));
123
+ } else if (Types.DOUBLE.equals(outputColumnType)) {
124
+ builder.setDouble(cur++, Double.parseDouble(words[i++]));
125
+ } else if (Types.LONG.equals(outputColumnType)) {
126
+ builder.setLong(cur++, Long.parseLong(words[i++]));
127
+ } else if (Types.TIMESTAMP.equals(outputColumnType)) {
128
+ builder.setTimestamp(cur++, timestampParsers[i].parse(words[i]));
129
+ i++;
130
+ }
131
+ }
132
+ continue;
133
+ }
134
+ if (reader.isNull(column)) {
135
+ builder.setNull(cur++);
136
+ continue;
137
+ }
138
+ add_builder(cur++, column);
139
+ }
140
+ builder.addRecord();
141
+ }
142
+ }
143
+ // TODO: use embulk-core system
144
+ private void add_builder(int cur, Column column) {
145
+ if (Types.STRING.equals(column.getType())) {
146
+ builder.setString(cur, reader.getString(column));
147
+ } else if (Types.BOOLEAN.equals(column.getType())) {
148
+ builder.setBoolean(cur, reader.getBoolean(column));
149
+ } else if (Types.DOUBLE.equals(column.getType())) {
150
+ builder.setDouble(cur, reader.getDouble(column));
151
+ } else if (Types.LONG.equals(column.getType())) {
152
+ builder.setLong(cur, reader.getLong(column));
153
+ } else if (Types.TIMESTAMP.equals(column.getType())) {
154
+ builder.setTimestamp(cur, reader.getTimestamp(column));
155
+ }
156
+ }
157
+ };
158
+ }
159
+
160
+ static class SplitColumnValidateException
161
+ extends DataException
162
+ {
163
+ SplitColumnValidateException(String message)
164
+ {
165
+ super(message);
166
+ }
167
+ }
168
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.filter.split_column;
2
+
3
+ public class TestSplitColumnFilterPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-split_column
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - yskn67
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: A filter plugin for Embulk to split one string column to several any type columns.
42
+ email:
43
+ - yskn1967@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - config/checkstyle/checkstyle.xml
53
+ - config/checkstyle/default.xml
54
+ - gradle/wrapper/gradle-wrapper.jar
55
+ - gradle/wrapper/gradle-wrapper.properties
56
+ - gradlew
57
+ - gradlew.bat
58
+ - lib/embulk/filter/split_column.rb
59
+ - src/main/java/org/embulk/filter/split_column/SplitColumnFilterPlugin.java
60
+ - src/test/java/org/embulk/filter/split_column/TestSplitColumnFilterPlugin.java
61
+ - classpath/embulk-filter-split_column-0.1.1.jar
62
+ homepage: https://github.com/yskn67/embulk-filter-split_column
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.1.9
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: A filter plugin for Embulk to split one string column to several any type columns
86
+ test_files: []