embulk-filter-split 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +23 -5
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/split/SplitFilterPlugin.java +104 -35
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6384139a423eb5deee45da63f93bcb31c924edf2
|
4
|
+
data.tar.gz: 43247a6be8b26043c1bfd3eb1355ff7eb286d194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9f147a00e6a78853e57f3ba3a650f81eb1259734f9cc6088506904980a2b28a72318fb607a70f5fb601303fc931a717849d4d78c60081d46a703d6fd613ecd7
|
7
|
+
data.tar.gz: 327780b980faefc843e35eb8c90badcbc248dcb843061bf4a1dd064145a68cee5a6fb008c21ae907e8c7f26c365602a2c2a77089c6aba0db1f4105e6f9c76ba8
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
id "eclipse"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
@@ -13,14 +14,14 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.2.0"
|
17
18
|
|
18
19
|
sourceCompatibility = 1.7
|
19
20
|
targetCompatibility = 1.7
|
20
21
|
|
21
22
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
23
|
+
compile "org.embulk:embulk-core:0.8.13"
|
24
|
+
provided "org.embulk:embulk-core:0.8.13"
|
24
25
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
25
26
|
testCompile "junit:junit:4.+"
|
26
27
|
}
|
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
32
33
|
}
|
33
34
|
clean { delete "classpath" }
|
34
35
|
|
36
|
+
checkstyle {
|
37
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
38
|
+
toolVersion = '6.14.1'
|
39
|
+
}
|
40
|
+
checkstyleMain {
|
41
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
42
|
+
ignoreFailures = true
|
43
|
+
}
|
44
|
+
checkstyleTest {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
46
|
+
ignoreFailures = true
|
47
|
+
}
|
48
|
+
task checkstyle(type: Checkstyle) {
|
49
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
50
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
51
|
+
}
|
52
|
+
|
35
53
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
36
54
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
37
55
|
script "${project.name}.gemspec"
|
@@ -58,10 +76,10 @@ Gem::Specification.new do |spec|
|
|
58
76
|
spec.version = "${project.version}"
|
59
77
|
spec.authors = ["toyama0919"]
|
60
78
|
spec.summary = %[Split filter plugin for Embulk]
|
61
|
-
spec.description = %[Split
|
79
|
+
spec.description = %[Split]
|
62
80
|
spec.email = ["toyama0919@gmail.com"]
|
63
81
|
spec.licenses = ["MIT"]
|
64
|
-
spec.homepage = "https://github.com/toyama0919/embulk-filter-split"
|
82
|
+
# TODO set this: spec.homepage = "https://github.com/toyama0919/embulk-filter-split"
|
65
83
|
|
66
84
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
67
85
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
@@ -1,5 +1,8 @@
|
|
1
1
|
package org.embulk.filter.split;
|
2
2
|
|
3
|
+
import java.util.Arrays;
|
4
|
+
import java.util.List;
|
5
|
+
|
3
6
|
import org.apache.commons.lang3.StringUtils;
|
4
7
|
import org.embulk.config.Config;
|
5
8
|
import org.embulk.config.ConfigDefault;
|
@@ -15,9 +18,11 @@ import org.embulk.spi.PageOutput;
|
|
15
18
|
import org.embulk.spi.PageReader;
|
16
19
|
import org.embulk.spi.Schema;
|
17
20
|
import org.embulk.spi.type.Types;
|
21
|
+
import org.msgpack.value.Value;
|
18
22
|
|
19
23
|
import com.google.common.base.Optional;
|
20
24
|
import com.google.common.collect.ImmutableList;
|
25
|
+
import com.google.common.collect.Lists;
|
21
26
|
|
22
27
|
public class SplitFilterPlugin
|
23
28
|
implements FilterPlugin
|
@@ -29,6 +34,10 @@ public class SplitFilterPlugin
|
|
29
34
|
@ConfigDefault("true")
|
30
35
|
public boolean getKeepInput();
|
31
36
|
|
37
|
+
@Config("skip_null_or_empty")
|
38
|
+
@ConfigDefault("true")
|
39
|
+
public boolean getSkipNullOrEmpty();
|
40
|
+
|
32
41
|
@Config("delimiter")
|
33
42
|
@ConfigDefault("\",\"")
|
34
43
|
public String getDelimiter();
|
@@ -37,8 +46,7 @@ public class SplitFilterPlugin
|
|
37
46
|
public String getTargetKey();
|
38
47
|
|
39
48
|
@Config("output_key")
|
40
|
-
|
41
|
-
public Optional<String> getOutputKey();
|
49
|
+
public String getOutputKey();
|
42
50
|
}
|
43
51
|
|
44
52
|
@Override
|
@@ -50,15 +58,14 @@ public class SplitFilterPlugin
|
|
50
58
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
51
59
|
int i = 0;
|
52
60
|
if (task.getKeepInput()) {
|
53
|
-
for (Column inputColumn: inputSchema.getColumns()) {
|
61
|
+
for (Column inputColumn : inputSchema.getColumns()) {
|
54
62
|
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
55
63
|
builder.add(outputColumn);
|
56
64
|
}
|
57
65
|
}
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
66
|
+
Column outputColumn = new Column(i++, task.getOutputKey(), Types.STRING);
|
67
|
+
builder.add(outputColumn);
|
68
|
+
|
62
69
|
Schema outputSchema = new Schema(builder.build());
|
63
70
|
control.run(task.dump(), outputSchema);
|
64
71
|
}
|
@@ -69,53 +76,115 @@ public class SplitFilterPlugin
|
|
69
76
|
{
|
70
77
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
71
78
|
final Column targetColumn = inputSchema.lookupColumn(task.getTargetKey());
|
72
|
-
final Column outputColumn = task.getOutputKey()
|
73
|
-
outputSchema.lookupColumn(task.getOutputKey().get()) :
|
74
|
-
outputSchema.lookupColumn(task.getTargetKey());
|
79
|
+
final Column outputColumn = outputSchema.lookupColumn(task.getOutputKey());
|
75
80
|
|
76
81
|
return new PageOutput() {
|
77
82
|
private PageReader reader = new PageReader(inputSchema);
|
78
83
|
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
79
84
|
|
80
85
|
@Override
|
81
|
-
public void finish()
|
86
|
+
public void finish()
|
87
|
+
{
|
82
88
|
builder.finish();
|
83
89
|
}
|
84
90
|
|
85
91
|
@Override
|
86
|
-
public void close()
|
92
|
+
public void close()
|
93
|
+
{
|
87
94
|
builder.close();
|
88
95
|
}
|
89
96
|
|
90
97
|
@Override
|
91
|
-
public void add(Page page)
|
98
|
+
public void add(Page page)
|
99
|
+
{
|
92
100
|
reader.setPage(page);
|
93
101
|
while (reader.nextRecord()) {
|
94
|
-
String
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
builder.setString(outputColumn, word);
|
99
|
-
continue;
|
100
|
-
}
|
101
|
-
if (reader.isNull(column)) {
|
102
|
-
builder.setNull(column);
|
103
|
-
continue;
|
104
|
-
}
|
105
|
-
if (Types.STRING.equals(column.getType())) {
|
106
|
-
builder.setString(column, reader.getString(column));
|
107
|
-
} else if (Types.BOOLEAN.equals(column.getType())) {
|
108
|
-
builder.setBoolean(column, reader.getBoolean(column));
|
109
|
-
} else if (Types.DOUBLE.equals(column.getType())) {
|
110
|
-
builder.setDouble(column, reader.getDouble(column));
|
111
|
-
} else if (Types.LONG.equals(column.getType())) {
|
112
|
-
builder.setLong(column, reader.getLong(column));
|
113
|
-
} else if (Types.TIMESTAMP.equals(column.getType())) {
|
114
|
-
builder.setTimestamp(column, reader.getTimestamp(column));
|
115
|
-
}
|
102
|
+
List<String> words = getWords(task, targetColumn);
|
103
|
+
if (reader.isNull(targetColumn) || words.isEmpty()) {
|
104
|
+
if (task.getSkipNullOrEmpty()) {
|
105
|
+
continue;
|
116
106
|
}
|
107
|
+
setColumns(outputSchema, outputColumn, null);
|
117
108
|
builder.addRecord();
|
118
109
|
}
|
110
|
+
else {
|
111
|
+
for (String word : words) {
|
112
|
+
setColumns(outputSchema, outputColumn, word);
|
113
|
+
builder.addRecord();
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
/**
|
120
|
+
* @param task
|
121
|
+
* @param targetColumn
|
122
|
+
* @return
|
123
|
+
*/
|
124
|
+
private List<String> getWords(final PluginTask task, final Column targetColumn)
|
125
|
+
{
|
126
|
+
List<String> words = Lists.newArrayList();
|
127
|
+
if (targetColumn.getType().equals(Types.STRING)) {
|
128
|
+
words = Arrays.asList(StringUtils.split(reader.getString(targetColumn), task.getDelimiter()));
|
129
|
+
}
|
130
|
+
else if (targetColumn.getType().equals(Types.JSON)) {
|
131
|
+
final Value json = reader.getJson(targetColumn);
|
132
|
+
if (json.isArrayValue()) {
|
133
|
+
for (Value value : json.asArrayValue().list()) {
|
134
|
+
words.add(value.toString());
|
135
|
+
}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
return words;
|
139
|
+
}
|
140
|
+
|
141
|
+
/**
|
142
|
+
* @param outputSchema
|
143
|
+
* @param outputColumn
|
144
|
+
* @param word
|
145
|
+
*/
|
146
|
+
private void setColumns(final Schema outputSchema, final Column outputColumn, String word)
|
147
|
+
{
|
148
|
+
for (Column column : outputSchema.getColumns()) {
|
149
|
+
if (column.getName().equals(outputColumn.getName())) {
|
150
|
+
if (word == null) {
|
151
|
+
builder.setNull(outputColumn);
|
152
|
+
}
|
153
|
+
else {
|
154
|
+
builder.setString(outputColumn, word);
|
155
|
+
}
|
156
|
+
continue;
|
157
|
+
}
|
158
|
+
setKeepColumns(column);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
/**
|
163
|
+
* @param column
|
164
|
+
*/
|
165
|
+
private void setKeepColumns(Column column)
|
166
|
+
{
|
167
|
+
if (reader.isNull(column)) {
|
168
|
+
builder.setNull(column);
|
169
|
+
return;
|
170
|
+
}
|
171
|
+
if (Types.STRING.equals(column.getType())) {
|
172
|
+
builder.setString(column, reader.getString(column));
|
173
|
+
}
|
174
|
+
else if (Types.BOOLEAN.equals(column.getType())) {
|
175
|
+
builder.setBoolean(column, reader.getBoolean(column));
|
176
|
+
}
|
177
|
+
else if (Types.DOUBLE.equals(column.getType())) {
|
178
|
+
builder.setDouble(column, reader.getDouble(column));
|
179
|
+
}
|
180
|
+
else if (Types.LONG.equals(column.getType())) {
|
181
|
+
builder.setLong(column, reader.getLong(column));
|
182
|
+
}
|
183
|
+
else if (Types.TIMESTAMP.equals(column.getType())) {
|
184
|
+
builder.setTimestamp(column, reader.getTimestamp(column));
|
185
|
+
}
|
186
|
+
else if (Types.JSON.equals(column.getType())) {
|
187
|
+
builder.setJson(column, reader.getJson(column));
|
119
188
|
}
|
120
189
|
}
|
121
190
|
};
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-split
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Split
|
41
|
+
description: Split
|
42
42
|
email:
|
43
43
|
- toyama0919@gmail.com
|
44
44
|
executables: []
|
@@ -49,6 +49,8 @@ files:
|
|
49
49
|
- LICENSE.txt
|
50
50
|
- README.md
|
51
51
|
- build.gradle
|
52
|
+
- config/checkstyle/checkstyle.xml
|
53
|
+
- config/checkstyle/default.xml
|
52
54
|
- gradle/wrapper/gradle-wrapper.jar
|
53
55
|
- gradle/wrapper/gradle-wrapper.properties
|
54
56
|
- gradlew
|
@@ -56,8 +58,8 @@ files:
|
|
56
58
|
- lib/embulk/filter/split.rb
|
57
59
|
- src/main/java/org/embulk/filter/split/SplitFilterPlugin.java
|
58
60
|
- src/test/java/org/embulk/filter/split/TestSplitFilterPlugin.java
|
59
|
-
- classpath/embulk-filter-split-0.
|
60
|
-
homepage:
|
61
|
+
- classpath/embulk-filter-split-0.2.0.jar
|
62
|
+
homepage:
|
61
63
|
licenses:
|
62
64
|
- MIT
|
63
65
|
metadata: {}
|
@@ -82,4 +84,3 @@ signing_key:
|
|
82
84
|
specification_version: 4
|
83
85
|
summary: Split filter plugin for Embulk
|
84
86
|
test_files: []
|
85
|
-
has_rdoc:
|