embulk-filter-split 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +23 -5
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/split/SplitFilterPlugin.java +104 -35
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6384139a423eb5deee45da63f93bcb31c924edf2
|
4
|
+
data.tar.gz: 43247a6be8b26043c1bfd3eb1355ff7eb286d194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9f147a00e6a78853e57f3ba3a650f81eb1259734f9cc6088506904980a2b28a72318fb607a70f5fb601303fc931a717849d4d78c60081d46a703d6fd613ecd7
|
7
|
+
data.tar.gz: 327780b980faefc843e35eb8c90badcbc248dcb843061bf4a1dd064145a68cee5a6fb008c21ae907e8c7f26c365602a2c2a77089c6aba0db1f4105e6f9c76ba8
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
id "eclipse"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
@@ -13,14 +14,14 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.2.0"
|
17
18
|
|
18
19
|
sourceCompatibility = 1.7
|
19
20
|
targetCompatibility = 1.7
|
20
21
|
|
21
22
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
23
|
+
compile "org.embulk:embulk-core:0.8.13"
|
24
|
+
provided "org.embulk:embulk-core:0.8.13"
|
24
25
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
25
26
|
testCompile "junit:junit:4.+"
|
26
27
|
}
|
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
32
33
|
}
|
33
34
|
clean { delete "classpath" }
|
34
35
|
|
36
|
+
checkstyle {
|
37
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
38
|
+
toolVersion = '6.14.1'
|
39
|
+
}
|
40
|
+
checkstyleMain {
|
41
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
42
|
+
ignoreFailures = true
|
43
|
+
}
|
44
|
+
checkstyleTest {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
46
|
+
ignoreFailures = true
|
47
|
+
}
|
48
|
+
task checkstyle(type: Checkstyle) {
|
49
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
50
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
51
|
+
}
|
52
|
+
|
35
53
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
36
54
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
37
55
|
script "${project.name}.gemspec"
|
@@ -58,10 +76,10 @@ Gem::Specification.new do |spec|
|
|
58
76
|
spec.version = "${project.version}"
|
59
77
|
spec.authors = ["toyama0919"]
|
60
78
|
spec.summary = %[Split filter plugin for Embulk]
|
61
|
-
spec.description = %[Split
|
79
|
+
spec.description = %[Split]
|
62
80
|
spec.email = ["toyama0919@gmail.com"]
|
63
81
|
spec.licenses = ["MIT"]
|
64
|
-
spec.homepage = "https://github.com/toyama0919/embulk-filter-split"
|
82
|
+
# TODO set this: spec.homepage = "https://github.com/toyama0919/embulk-filter-split"
|
65
83
|
|
66
84
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
67
85
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
@@ -1,5 +1,8 @@
|
|
1
1
|
package org.embulk.filter.split;
|
2
2
|
|
3
|
+
import java.util.Arrays;
|
4
|
+
import java.util.List;
|
5
|
+
|
3
6
|
import org.apache.commons.lang3.StringUtils;
|
4
7
|
import org.embulk.config.Config;
|
5
8
|
import org.embulk.config.ConfigDefault;
|
@@ -15,9 +18,11 @@ import org.embulk.spi.PageOutput;
|
|
15
18
|
import org.embulk.spi.PageReader;
|
16
19
|
import org.embulk.spi.Schema;
|
17
20
|
import org.embulk.spi.type.Types;
|
21
|
+
import org.msgpack.value.Value;
|
18
22
|
|
19
23
|
import com.google.common.base.Optional;
|
20
24
|
import com.google.common.collect.ImmutableList;
|
25
|
+
import com.google.common.collect.Lists;
|
21
26
|
|
22
27
|
public class SplitFilterPlugin
|
23
28
|
implements FilterPlugin
|
@@ -29,6 +34,10 @@ public class SplitFilterPlugin
|
|
29
34
|
@ConfigDefault("true")
|
30
35
|
public boolean getKeepInput();
|
31
36
|
|
37
|
+
@Config("skip_null_or_empty")
|
38
|
+
@ConfigDefault("true")
|
39
|
+
public boolean getSkipNullOrEmpty();
|
40
|
+
|
32
41
|
@Config("delimiter")
|
33
42
|
@ConfigDefault("\",\"")
|
34
43
|
public String getDelimiter();
|
@@ -37,8 +46,7 @@ public class SplitFilterPlugin
|
|
37
46
|
public String getTargetKey();
|
38
47
|
|
39
48
|
@Config("output_key")
|
40
|
-
|
41
|
-
public Optional<String> getOutputKey();
|
49
|
+
public String getOutputKey();
|
42
50
|
}
|
43
51
|
|
44
52
|
@Override
|
@@ -50,15 +58,14 @@ public class SplitFilterPlugin
|
|
50
58
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
51
59
|
int i = 0;
|
52
60
|
if (task.getKeepInput()) {
|
53
|
-
for (Column inputColumn: inputSchema.getColumns()) {
|
61
|
+
for (Column inputColumn : inputSchema.getColumns()) {
|
54
62
|
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
55
63
|
builder.add(outputColumn);
|
56
64
|
}
|
57
65
|
}
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
66
|
+
Column outputColumn = new Column(i++, task.getOutputKey(), Types.STRING);
|
67
|
+
builder.add(outputColumn);
|
68
|
+
|
62
69
|
Schema outputSchema = new Schema(builder.build());
|
63
70
|
control.run(task.dump(), outputSchema);
|
64
71
|
}
|
@@ -69,53 +76,115 @@ public class SplitFilterPlugin
|
|
69
76
|
{
|
70
77
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
71
78
|
final Column targetColumn = inputSchema.lookupColumn(task.getTargetKey());
|
72
|
-
final Column outputColumn = task.getOutputKey()
|
73
|
-
outputSchema.lookupColumn(task.getOutputKey().get()) :
|
74
|
-
outputSchema.lookupColumn(task.getTargetKey());
|
79
|
+
final Column outputColumn = outputSchema.lookupColumn(task.getOutputKey());
|
75
80
|
|
76
81
|
return new PageOutput() {
|
77
82
|
private PageReader reader = new PageReader(inputSchema);
|
78
83
|
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
79
84
|
|
80
85
|
@Override
|
81
|
-
public void finish()
|
86
|
+
public void finish()
|
87
|
+
{
|
82
88
|
builder.finish();
|
83
89
|
}
|
84
90
|
|
85
91
|
@Override
|
86
|
-
public void close()
|
92
|
+
public void close()
|
93
|
+
{
|
87
94
|
builder.close();
|
88
95
|
}
|
89
96
|
|
90
97
|
@Override
|
91
|
-
public void add(Page page)
|
98
|
+
public void add(Page page)
|
99
|
+
{
|
92
100
|
reader.setPage(page);
|
93
101
|
while (reader.nextRecord()) {
|
94
|
-
String
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
builder.setString(outputColumn, word);
|
99
|
-
continue;
|
100
|
-
}
|
101
|
-
if (reader.isNull(column)) {
|
102
|
-
builder.setNull(column);
|
103
|
-
continue;
|
104
|
-
}
|
105
|
-
if (Types.STRING.equals(column.getType())) {
|
106
|
-
builder.setString(column, reader.getString(column));
|
107
|
-
} else if (Types.BOOLEAN.equals(column.getType())) {
|
108
|
-
builder.setBoolean(column, reader.getBoolean(column));
|
109
|
-
} else if (Types.DOUBLE.equals(column.getType())) {
|
110
|
-
builder.setDouble(column, reader.getDouble(column));
|
111
|
-
} else if (Types.LONG.equals(column.getType())) {
|
112
|
-
builder.setLong(column, reader.getLong(column));
|
113
|
-
} else if (Types.TIMESTAMP.equals(column.getType())) {
|
114
|
-
builder.setTimestamp(column, reader.getTimestamp(column));
|
115
|
-
}
|
102
|
+
List<String> words = getWords(task, targetColumn);
|
103
|
+
if (reader.isNull(targetColumn) || words.isEmpty()) {
|
104
|
+
if (task.getSkipNullOrEmpty()) {
|
105
|
+
continue;
|
116
106
|
}
|
107
|
+
setColumns(outputSchema, outputColumn, null);
|
117
108
|
builder.addRecord();
|
118
109
|
}
|
110
|
+
else {
|
111
|
+
for (String word : words) {
|
112
|
+
setColumns(outputSchema, outputColumn, word);
|
113
|
+
builder.addRecord();
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
/**
|
120
|
+
* @param task
|
121
|
+
* @param targetColumn
|
122
|
+
* @return
|
123
|
+
*/
|
124
|
+
private List<String> getWords(final PluginTask task, final Column targetColumn)
|
125
|
+
{
|
126
|
+
List<String> words = Lists.newArrayList();
|
127
|
+
if (targetColumn.getType().equals(Types.STRING)) {
|
128
|
+
words = Arrays.asList(StringUtils.split(reader.getString(targetColumn), task.getDelimiter()));
|
129
|
+
}
|
130
|
+
else if (targetColumn.getType().equals(Types.JSON)) {
|
131
|
+
final Value json = reader.getJson(targetColumn);
|
132
|
+
if (json.isArrayValue()) {
|
133
|
+
for (Value value : json.asArrayValue().list()) {
|
134
|
+
words.add(value.toString());
|
135
|
+
}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
return words;
|
139
|
+
}
|
140
|
+
|
141
|
+
/**
|
142
|
+
* @param outputSchema
|
143
|
+
* @param outputColumn
|
144
|
+
* @param word
|
145
|
+
*/
|
146
|
+
private void setColumns(final Schema outputSchema, final Column outputColumn, String word)
|
147
|
+
{
|
148
|
+
for (Column column : outputSchema.getColumns()) {
|
149
|
+
if (column.getName().equals(outputColumn.getName())) {
|
150
|
+
if (word == null) {
|
151
|
+
builder.setNull(outputColumn);
|
152
|
+
}
|
153
|
+
else {
|
154
|
+
builder.setString(outputColumn, word);
|
155
|
+
}
|
156
|
+
continue;
|
157
|
+
}
|
158
|
+
setKeepColumns(column);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
/**
|
163
|
+
* @param column
|
164
|
+
*/
|
165
|
+
private void setKeepColumns(Column column)
|
166
|
+
{
|
167
|
+
if (reader.isNull(column)) {
|
168
|
+
builder.setNull(column);
|
169
|
+
return;
|
170
|
+
}
|
171
|
+
if (Types.STRING.equals(column.getType())) {
|
172
|
+
builder.setString(column, reader.getString(column));
|
173
|
+
}
|
174
|
+
else if (Types.BOOLEAN.equals(column.getType())) {
|
175
|
+
builder.setBoolean(column, reader.getBoolean(column));
|
176
|
+
}
|
177
|
+
else if (Types.DOUBLE.equals(column.getType())) {
|
178
|
+
builder.setDouble(column, reader.getDouble(column));
|
179
|
+
}
|
180
|
+
else if (Types.LONG.equals(column.getType())) {
|
181
|
+
builder.setLong(column, reader.getLong(column));
|
182
|
+
}
|
183
|
+
else if (Types.TIMESTAMP.equals(column.getType())) {
|
184
|
+
builder.setTimestamp(column, reader.getTimestamp(column));
|
185
|
+
}
|
186
|
+
else if (Types.JSON.equals(column.getType())) {
|
187
|
+
builder.setJson(column, reader.getJson(column));
|
119
188
|
}
|
120
189
|
}
|
121
190
|
};
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-split
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Split
|
41
|
+
description: Split
|
42
42
|
email:
|
43
43
|
- toyama0919@gmail.com
|
44
44
|
executables: []
|
@@ -49,6 +49,8 @@ files:
|
|
49
49
|
- LICENSE.txt
|
50
50
|
- README.md
|
51
51
|
- build.gradle
|
52
|
+
- config/checkstyle/checkstyle.xml
|
53
|
+
- config/checkstyle/default.xml
|
52
54
|
- gradle/wrapper/gradle-wrapper.jar
|
53
55
|
- gradle/wrapper/gradle-wrapper.properties
|
54
56
|
- gradlew
|
@@ -56,8 +58,8 @@ files:
|
|
56
58
|
- lib/embulk/filter/split.rb
|
57
59
|
- src/main/java/org/embulk/filter/split/SplitFilterPlugin.java
|
58
60
|
- src/test/java/org/embulk/filter/split/TestSplitFilterPlugin.java
|
59
|
-
- classpath/embulk-filter-split-0.
|
60
|
-
homepage:
|
61
|
+
- classpath/embulk-filter-split-0.2.0.jar
|
62
|
+
homepage:
|
61
63
|
licenses:
|
62
64
|
- MIT
|
63
65
|
metadata: {}
|
@@ -82,4 +84,3 @@ signing_key:
|
|
82
84
|
specification_version: 4
|
83
85
|
summary: Split filter plugin for Embulk
|
84
86
|
test_files: []
|
85
|
-
has_rdoc:
|