embulk-filter-woothee 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +21 -3
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/WootheeFilterPlugin.java +13 -92
- data/src/main/java/org/embulk/filter/WootheePageOutput.java +113 -0
- metadata +16 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf4f311a1e93643d40ce9d75f6eaacb383ab4501
|
4
|
+
data.tar.gz: a54509d8e75e0add139fd66db496719e60907b75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 705ab844c47a63fd106a706046132280509cdcd0f88239d43be214e1ce1949613865cd68b7c90324ae79bd27af88e012b4ba342c163c95cec8d4876ad78a71bf
|
7
|
+
data.tar.gz: 843fdb0ec6028281116b3461bcfe246bb3e7a9301abebd50892d2a3e9e18744b899b0cc422651a67939fd42097e179b24dac864fb253ac7a031ba7a4c2f340f7
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
id "eclipse"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
@@ -13,14 +14,14 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.3.0"
|
17
18
|
|
18
19
|
sourceCompatibility = 1.7
|
19
20
|
targetCompatibility = 1.7
|
20
21
|
|
21
22
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
23
|
+
compile "org.embulk:embulk-core:0.8.9"
|
24
|
+
provided "org.embulk:embulk-core:0.8.9"
|
24
25
|
compile 'is.tagomor.woothee:woothee-java:1.2.0'
|
25
26
|
testCompile "junit:junit:4.+"
|
26
27
|
}
|
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
32
33
|
}
|
33
34
|
clean { delete "classpath" }
|
34
35
|
|
36
|
+
checkstyle {
|
37
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
38
|
+
toolVersion = '6.14.1'
|
39
|
+
}
|
40
|
+
checkstyleMain {
|
41
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
42
|
+
ignoreFailures = true
|
43
|
+
}
|
44
|
+
checkstyleTest {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
46
|
+
ignoreFailures = true
|
47
|
+
}
|
48
|
+
task checkstyle(type: Checkstyle) {
|
49
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
50
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
51
|
+
}
|
52
|
+
|
35
53
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
36
54
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
37
55
|
script "${project.name}.gemspec"
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
@@ -1,9 +1,6 @@
|
|
1
1
|
package org.embulk.filter;
|
2
2
|
|
3
|
-
import is.tagomor.woothee.Classifier;
|
4
|
-
|
5
3
|
import java.util.List;
|
6
|
-
import java.util.Map;
|
7
4
|
|
8
5
|
import org.embulk.config.Config;
|
9
6
|
import org.embulk.config.ConfigDefault;
|
@@ -11,18 +8,13 @@ import org.embulk.config.ConfigSource;
|
|
11
8
|
import org.embulk.config.Task;
|
12
9
|
import org.embulk.config.TaskSource;
|
13
10
|
import org.embulk.spi.Column;
|
14
|
-
import org.embulk.spi.Exec;
|
15
11
|
import org.embulk.spi.FilterPlugin;
|
16
|
-
import org.embulk.spi.Page;
|
17
|
-
import org.embulk.spi.PageBuilder;
|
18
12
|
import org.embulk.spi.PageOutput;
|
19
|
-
import org.embulk.spi.PageReader;
|
20
13
|
import org.embulk.spi.Schema;
|
21
14
|
import org.embulk.spi.type.Types;
|
22
15
|
|
23
16
|
import com.google.common.base.Optional;
|
24
17
|
import com.google.common.collect.ImmutableList;
|
25
|
-
import com.google.common.collect.Maps;
|
26
18
|
|
27
19
|
public class WootheeFilterPlugin implements FilterPlugin
|
28
20
|
{
|
@@ -70,6 +62,17 @@ public class WootheeFilterPlugin implements FilterPlugin
|
|
70
62
|
{
|
71
63
|
PluginTask task = config.loadConfig(PluginTask.class);
|
72
64
|
|
65
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
66
|
+
|
67
|
+
control.run(task.dump(), outputSchema);
|
68
|
+
}
|
69
|
+
|
70
|
+
/**
|
71
|
+
* @param task
|
72
|
+
* @param inputSchema
|
73
|
+
* @return
|
74
|
+
*/
|
75
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema) {
|
73
76
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
74
77
|
int i = 0;
|
75
78
|
for (Column inputColumn: inputSchema.getColumns()) {
|
@@ -84,94 +87,12 @@ public class WootheeFilterPlugin implements FilterPlugin
|
|
84
87
|
builder.add(new Column(i++, task.getOutKeyVendor(), Types.STRING));
|
85
88
|
}
|
86
89
|
Schema outputSchema = new Schema(builder.build());
|
87
|
-
|
88
|
-
control.run(task.dump(), outputSchema);
|
90
|
+
return outputSchema;
|
89
91
|
}
|
90
92
|
|
91
93
|
@Override
|
92
94
|
public PageOutput open(TaskSource taskSource, final Schema inputSchema, final Schema outputSchema, final PageOutput output)
|
93
95
|
{
|
94
|
-
|
95
|
-
|
96
|
-
final List<Column> outputColumns = outputSchema.getColumns();
|
97
|
-
final List<Column> inputColumns = inputSchema.getColumns();
|
98
|
-
Map<String, Column> inputColumnMap = Maps.newHashMap();
|
99
|
-
final Map<String, Column> wootheeColumnMap = Maps.newHashMap();
|
100
|
-
for (Column column : outputColumns) {
|
101
|
-
if (!inputColumns.contains(column)) {
|
102
|
-
wootheeColumnMap.put(column.getName(), column);
|
103
|
-
} else {
|
104
|
-
inputColumnMap.put(column.getName(), column);
|
105
|
-
}
|
106
|
-
}
|
107
|
-
final Column keyNameColumn = inputColumnMap.get(task.getKeyName());
|
108
|
-
|
109
|
-
return new PageOutput() {
|
110
|
-
private PageReader reader = new PageReader(inputSchema);
|
111
|
-
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
112
|
-
|
113
|
-
@Override
|
114
|
-
public void finish() {
|
115
|
-
builder.finish();
|
116
|
-
}
|
117
|
-
|
118
|
-
@Override
|
119
|
-
public void close() {
|
120
|
-
builder.close();
|
121
|
-
}
|
122
|
-
|
123
|
-
@Override
|
124
|
-
public void add(Page page) {
|
125
|
-
reader.setPage(page);
|
126
|
-
|
127
|
-
while (reader.nextRecord()) {
|
128
|
-
String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
|
129
|
-
Map<String, String> ua = Classifier.parse(userAgentString);
|
130
|
-
setValue(builder, ua);
|
131
|
-
if (task.getFilterCategories().isPresent()) {
|
132
|
-
if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
|
133
|
-
builder.addRecord();
|
134
|
-
}
|
135
|
-
} else if (task.getDropCategories().isPresent()) {
|
136
|
-
if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
|
137
|
-
builder.addRecord();
|
138
|
-
}
|
139
|
-
} else {
|
140
|
-
builder.addRecord();
|
141
|
-
}
|
142
|
-
}
|
143
|
-
}
|
144
|
-
|
145
|
-
/**
|
146
|
-
* @param builder
|
147
|
-
*/
|
148
|
-
private void setValue(PageBuilder builder, Map<String, String> ua) {
|
149
|
-
if (task.getMergeAgentInfo()) {
|
150
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
|
151
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
|
152
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
|
153
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
|
154
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
|
155
|
-
}
|
156
|
-
|
157
|
-
for (Column inputColumn: inputColumns) {
|
158
|
-
if (reader.isNull(inputColumn)) {
|
159
|
-
builder.setNull(inputColumn);
|
160
|
-
continue;
|
161
|
-
}
|
162
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
163
|
-
builder.setString(inputColumn, reader.getString(inputColumn));
|
164
|
-
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
165
|
-
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
166
|
-
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
167
|
-
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
168
|
-
} else if (Types.LONG.equals(inputColumn.getType())) {
|
169
|
-
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
170
|
-
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
171
|
-
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
172
|
-
}
|
173
|
-
}
|
174
|
-
}
|
175
|
-
};
|
96
|
+
return new WootheePageOutput(taskSource, inputSchema, outputSchema, output);
|
176
97
|
}
|
177
98
|
}
|
@@ -0,0 +1,113 @@
|
|
1
|
+
package org.embulk.filter;
|
2
|
+
|
3
|
+
import is.tagomor.woothee.Classifier;
|
4
|
+
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.Map;
|
7
|
+
|
8
|
+
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.filter.WootheeFilterPlugin.PluginTask;
|
10
|
+
import org.embulk.spi.Column;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageBuilder;
|
14
|
+
import org.embulk.spi.PageOutput;
|
15
|
+
import org.embulk.spi.PageReader;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.type.Types;
|
18
|
+
|
19
|
+
import com.google.common.collect.Maps;
|
20
|
+
|
21
|
+
public class WootheePageOutput implements PageOutput
|
22
|
+
{
|
23
|
+
private final PluginTask task;
|
24
|
+
private final List<Column> outputColumns;
|
25
|
+
private final List<Column> inputColumns;
|
26
|
+
private final Map<String, Column> wootheeColumnMap;
|
27
|
+
private final Column keyNameColumn;
|
28
|
+
private final PageReader reader;
|
29
|
+
private final PageBuilder builder;
|
30
|
+
|
31
|
+
public WootheePageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
|
32
|
+
this.task = taskSource.loadTask(PluginTask.class);
|
33
|
+
this.outputColumns = outputSchema.getColumns();
|
34
|
+
this.inputColumns = inputSchema.getColumns();
|
35
|
+
Map<String, Column> inputColumnMap = Maps.newHashMap();
|
36
|
+
this.wootheeColumnMap = Maps.newHashMap();
|
37
|
+
for (Column column : outputColumns) {
|
38
|
+
if (!inputColumns.contains(column)) {
|
39
|
+
wootheeColumnMap.put(column.getName(), column);
|
40
|
+
} else {
|
41
|
+
inputColumnMap.put(column.getName(), column);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
this.reader = new PageReader(inputSchema);
|
45
|
+
this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
46
|
+
this.keyNameColumn = inputColumnMap.get(task.getKeyName());
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void finish() {
|
51
|
+
builder.finish();
|
52
|
+
}
|
53
|
+
|
54
|
+
@Override
|
55
|
+
public void close() {
|
56
|
+
builder.close();
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void add(Page page) {
|
61
|
+
reader.setPage(page);
|
62
|
+
|
63
|
+
while (reader.nextRecord()) {
|
64
|
+
String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
|
65
|
+
Map<String, String> ua = Classifier.parse(userAgentString);
|
66
|
+
setValue(builder, ua);
|
67
|
+
if (task.getFilterCategories().isPresent()) {
|
68
|
+
if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
|
69
|
+
builder.addRecord();
|
70
|
+
}
|
71
|
+
} else if (task.getDropCategories().isPresent()) {
|
72
|
+
if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
|
73
|
+
builder.addRecord();
|
74
|
+
}
|
75
|
+
} else {
|
76
|
+
builder.addRecord();
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* @param builder
|
83
|
+
*/
|
84
|
+
private void setValue(PageBuilder builder, Map<String, String> ua) {
|
85
|
+
if (task.getMergeAgentInfo()) {
|
86
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
|
87
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
|
88
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
|
89
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
|
90
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
|
91
|
+
}
|
92
|
+
|
93
|
+
for (Column inputColumn: inputColumns) {
|
94
|
+
if (reader.isNull(inputColumn)) {
|
95
|
+
builder.setNull(inputColumn);
|
96
|
+
continue;
|
97
|
+
}
|
98
|
+
if (Types.STRING.equals(inputColumn.getType())) {
|
99
|
+
builder.setString(inputColumn, reader.getString(inputColumn));
|
100
|
+
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
101
|
+
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
102
|
+
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
103
|
+
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
104
|
+
} else if (Types.LONG.equals(inputColumn.getType())) {
|
105
|
+
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
106
|
+
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
107
|
+
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
108
|
+
} else if (Types.JSON.equals(inputColumn.getType())) {
|
109
|
+
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-woothee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Woothee filter plugin for Embulk
|
42
42
|
email:
|
43
43
|
- toyama0919@gmail.com
|
@@ -50,14 +50,17 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
53
55
|
- gradle/wrapper/gradle-wrapper.jar
|
54
56
|
- gradle/wrapper/gradle-wrapper.properties
|
55
57
|
- gradlew
|
56
58
|
- gradlew.bat
|
57
59
|
- lib/embulk/filter/woothee.rb
|
58
60
|
- src/main/java/org/embulk/filter/WootheeFilterPlugin.java
|
61
|
+
- src/main/java/org/embulk/filter/WootheePageOutput.java
|
59
62
|
- src/test/java/org/embulk/filter/TestWootheeFilterPlugin.java
|
60
|
-
- classpath/embulk-filter-woothee-0.
|
63
|
+
- classpath/embulk-filter-woothee-0.3.0.jar
|
61
64
|
- classpath/woothee-java-1.2.0.jar
|
62
65
|
homepage: https://github.com/toyama0919/embulk-filter-woothee
|
63
66
|
licenses:
|