embulk-filter-woothee 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +21 -3
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/WootheeFilterPlugin.java +13 -92
- data/src/main/java/org/embulk/filter/WootheePageOutput.java +113 -0
- metadata +16 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf4f311a1e93643d40ce9d75f6eaacb383ab4501
|
4
|
+
data.tar.gz: a54509d8e75e0add139fd66db496719e60907b75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 705ab844c47a63fd106a706046132280509cdcd0f88239d43be214e1ce1949613865cd68b7c90324ae79bd27af88e012b4ba342c163c95cec8d4876ad78a71bf
|
7
|
+
data.tar.gz: 843fdb0ec6028281116b3461bcfe246bb3e7a9301abebd50892d2a3e9e18744b899b0cc422651a67939fd42097e179b24dac864fb253ac7a031ba7a4c2f340f7
|
data/build.gradle
CHANGED
@@ -2,6 +2,7 @@ plugins {
|
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
|
+
id "checkstyle"
|
5
6
|
id "eclipse"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
@@ -13,14 +14,14 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.
|
17
|
+
version = "0.3.0"
|
17
18
|
|
18
19
|
sourceCompatibility = 1.7
|
19
20
|
targetCompatibility = 1.7
|
20
21
|
|
21
22
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
23
|
+
compile "org.embulk:embulk-core:0.8.9"
|
24
|
+
provided "org.embulk:embulk-core:0.8.9"
|
24
25
|
compile 'is.tagomor.woothee:woothee-java:1.2.0'
|
25
26
|
testCompile "junit:junit:4.+"
|
26
27
|
}
|
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
32
33
|
}
|
33
34
|
clean { delete "classpath" }
|
34
35
|
|
36
|
+
checkstyle {
|
37
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
38
|
+
toolVersion = '6.14.1'
|
39
|
+
}
|
40
|
+
checkstyleMain {
|
41
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
42
|
+
ignoreFailures = true
|
43
|
+
}
|
44
|
+
checkstyleTest {
|
45
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
46
|
+
ignoreFailures = true
|
47
|
+
}
|
48
|
+
task checkstyle(type: Checkstyle) {
|
49
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
50
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
51
|
+
}
|
52
|
+
|
35
53
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
36
54
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
37
55
|
script "${project.name}.gemspec"
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
@@ -1,9 +1,6 @@
|
|
1
1
|
package org.embulk.filter;
|
2
2
|
|
3
|
-
import is.tagomor.woothee.Classifier;
|
4
|
-
|
5
3
|
import java.util.List;
|
6
|
-
import java.util.Map;
|
7
4
|
|
8
5
|
import org.embulk.config.Config;
|
9
6
|
import org.embulk.config.ConfigDefault;
|
@@ -11,18 +8,13 @@ import org.embulk.config.ConfigSource;
|
|
11
8
|
import org.embulk.config.Task;
|
12
9
|
import org.embulk.config.TaskSource;
|
13
10
|
import org.embulk.spi.Column;
|
14
|
-
import org.embulk.spi.Exec;
|
15
11
|
import org.embulk.spi.FilterPlugin;
|
16
|
-
import org.embulk.spi.Page;
|
17
|
-
import org.embulk.spi.PageBuilder;
|
18
12
|
import org.embulk.spi.PageOutput;
|
19
|
-
import org.embulk.spi.PageReader;
|
20
13
|
import org.embulk.spi.Schema;
|
21
14
|
import org.embulk.spi.type.Types;
|
22
15
|
|
23
16
|
import com.google.common.base.Optional;
|
24
17
|
import com.google.common.collect.ImmutableList;
|
25
|
-
import com.google.common.collect.Maps;
|
26
18
|
|
27
19
|
public class WootheeFilterPlugin implements FilterPlugin
|
28
20
|
{
|
@@ -70,6 +62,17 @@ public class WootheeFilterPlugin implements FilterPlugin
|
|
70
62
|
{
|
71
63
|
PluginTask task = config.loadConfig(PluginTask.class);
|
72
64
|
|
65
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
66
|
+
|
67
|
+
control.run(task.dump(), outputSchema);
|
68
|
+
}
|
69
|
+
|
70
|
+
/**
|
71
|
+
* @param task
|
72
|
+
* @param inputSchema
|
73
|
+
* @return
|
74
|
+
*/
|
75
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema) {
|
73
76
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
74
77
|
int i = 0;
|
75
78
|
for (Column inputColumn: inputSchema.getColumns()) {
|
@@ -84,94 +87,12 @@ public class WootheeFilterPlugin implements FilterPlugin
|
|
84
87
|
builder.add(new Column(i++, task.getOutKeyVendor(), Types.STRING));
|
85
88
|
}
|
86
89
|
Schema outputSchema = new Schema(builder.build());
|
87
|
-
|
88
|
-
control.run(task.dump(), outputSchema);
|
90
|
+
return outputSchema;
|
89
91
|
}
|
90
92
|
|
91
93
|
@Override
|
92
94
|
public PageOutput open(TaskSource taskSource, final Schema inputSchema, final Schema outputSchema, final PageOutput output)
|
93
95
|
{
|
94
|
-
|
95
|
-
|
96
|
-
final List<Column> outputColumns = outputSchema.getColumns();
|
97
|
-
final List<Column> inputColumns = inputSchema.getColumns();
|
98
|
-
Map<String, Column> inputColumnMap = Maps.newHashMap();
|
99
|
-
final Map<String, Column> wootheeColumnMap = Maps.newHashMap();
|
100
|
-
for (Column column : outputColumns) {
|
101
|
-
if (!inputColumns.contains(column)) {
|
102
|
-
wootheeColumnMap.put(column.getName(), column);
|
103
|
-
} else {
|
104
|
-
inputColumnMap.put(column.getName(), column);
|
105
|
-
}
|
106
|
-
}
|
107
|
-
final Column keyNameColumn = inputColumnMap.get(task.getKeyName());
|
108
|
-
|
109
|
-
return new PageOutput() {
|
110
|
-
private PageReader reader = new PageReader(inputSchema);
|
111
|
-
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
112
|
-
|
113
|
-
@Override
|
114
|
-
public void finish() {
|
115
|
-
builder.finish();
|
116
|
-
}
|
117
|
-
|
118
|
-
@Override
|
119
|
-
public void close() {
|
120
|
-
builder.close();
|
121
|
-
}
|
122
|
-
|
123
|
-
@Override
|
124
|
-
public void add(Page page) {
|
125
|
-
reader.setPage(page);
|
126
|
-
|
127
|
-
while (reader.nextRecord()) {
|
128
|
-
String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
|
129
|
-
Map<String, String> ua = Classifier.parse(userAgentString);
|
130
|
-
setValue(builder, ua);
|
131
|
-
if (task.getFilterCategories().isPresent()) {
|
132
|
-
if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
|
133
|
-
builder.addRecord();
|
134
|
-
}
|
135
|
-
} else if (task.getDropCategories().isPresent()) {
|
136
|
-
if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
|
137
|
-
builder.addRecord();
|
138
|
-
}
|
139
|
-
} else {
|
140
|
-
builder.addRecord();
|
141
|
-
}
|
142
|
-
}
|
143
|
-
}
|
144
|
-
|
145
|
-
/**
|
146
|
-
* @param builder
|
147
|
-
*/
|
148
|
-
private void setValue(PageBuilder builder, Map<String, String> ua) {
|
149
|
-
if (task.getMergeAgentInfo()) {
|
150
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
|
151
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
|
152
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
|
153
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
|
154
|
-
builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
|
155
|
-
}
|
156
|
-
|
157
|
-
for (Column inputColumn: inputColumns) {
|
158
|
-
if (reader.isNull(inputColumn)) {
|
159
|
-
builder.setNull(inputColumn);
|
160
|
-
continue;
|
161
|
-
}
|
162
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
163
|
-
builder.setString(inputColumn, reader.getString(inputColumn));
|
164
|
-
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
165
|
-
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
166
|
-
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
167
|
-
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
168
|
-
} else if (Types.LONG.equals(inputColumn.getType())) {
|
169
|
-
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
170
|
-
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
171
|
-
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
172
|
-
}
|
173
|
-
}
|
174
|
-
}
|
175
|
-
};
|
96
|
+
return new WootheePageOutput(taskSource, inputSchema, outputSchema, output);
|
176
97
|
}
|
177
98
|
}
|
@@ -0,0 +1,113 @@
|
|
1
|
+
package org.embulk.filter;
|
2
|
+
|
3
|
+
import is.tagomor.woothee.Classifier;
|
4
|
+
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.Map;
|
7
|
+
|
8
|
+
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.filter.WootheeFilterPlugin.PluginTask;
|
10
|
+
import org.embulk.spi.Column;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageBuilder;
|
14
|
+
import org.embulk.spi.PageOutput;
|
15
|
+
import org.embulk.spi.PageReader;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.type.Types;
|
18
|
+
|
19
|
+
import com.google.common.collect.Maps;
|
20
|
+
|
21
|
+
public class WootheePageOutput implements PageOutput
|
22
|
+
{
|
23
|
+
private final PluginTask task;
|
24
|
+
private final List<Column> outputColumns;
|
25
|
+
private final List<Column> inputColumns;
|
26
|
+
private final Map<String, Column> wootheeColumnMap;
|
27
|
+
private final Column keyNameColumn;
|
28
|
+
private final PageReader reader;
|
29
|
+
private final PageBuilder builder;
|
30
|
+
|
31
|
+
public WootheePageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
|
32
|
+
this.task = taskSource.loadTask(PluginTask.class);
|
33
|
+
this.outputColumns = outputSchema.getColumns();
|
34
|
+
this.inputColumns = inputSchema.getColumns();
|
35
|
+
Map<String, Column> inputColumnMap = Maps.newHashMap();
|
36
|
+
this.wootheeColumnMap = Maps.newHashMap();
|
37
|
+
for (Column column : outputColumns) {
|
38
|
+
if (!inputColumns.contains(column)) {
|
39
|
+
wootheeColumnMap.put(column.getName(), column);
|
40
|
+
} else {
|
41
|
+
inputColumnMap.put(column.getName(), column);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
this.reader = new PageReader(inputSchema);
|
45
|
+
this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
46
|
+
this.keyNameColumn = inputColumnMap.get(task.getKeyName());
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void finish() {
|
51
|
+
builder.finish();
|
52
|
+
}
|
53
|
+
|
54
|
+
@Override
|
55
|
+
public void close() {
|
56
|
+
builder.close();
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void add(Page page) {
|
61
|
+
reader.setPage(page);
|
62
|
+
|
63
|
+
while (reader.nextRecord()) {
|
64
|
+
String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
|
65
|
+
Map<String, String> ua = Classifier.parse(userAgentString);
|
66
|
+
setValue(builder, ua);
|
67
|
+
if (task.getFilterCategories().isPresent()) {
|
68
|
+
if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
|
69
|
+
builder.addRecord();
|
70
|
+
}
|
71
|
+
} else if (task.getDropCategories().isPresent()) {
|
72
|
+
if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
|
73
|
+
builder.addRecord();
|
74
|
+
}
|
75
|
+
} else {
|
76
|
+
builder.addRecord();
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* @param builder
|
83
|
+
*/
|
84
|
+
private void setValue(PageBuilder builder, Map<String, String> ua) {
|
85
|
+
if (task.getMergeAgentInfo()) {
|
86
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
|
87
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
|
88
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
|
89
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
|
90
|
+
builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
|
91
|
+
}
|
92
|
+
|
93
|
+
for (Column inputColumn: inputColumns) {
|
94
|
+
if (reader.isNull(inputColumn)) {
|
95
|
+
builder.setNull(inputColumn);
|
96
|
+
continue;
|
97
|
+
}
|
98
|
+
if (Types.STRING.equals(inputColumn.getType())) {
|
99
|
+
builder.setString(inputColumn, reader.getString(inputColumn));
|
100
|
+
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
101
|
+
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
102
|
+
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
103
|
+
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
104
|
+
} else if (Types.LONG.equals(inputColumn.getType())) {
|
105
|
+
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
106
|
+
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
107
|
+
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
108
|
+
} else if (Types.JSON.equals(inputColumn.getType())) {
|
109
|
+
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-woothee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Woothee filter plugin for Embulk
|
42
42
|
email:
|
43
43
|
- toyama0919@gmail.com
|
@@ -50,14 +50,17 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
53
55
|
- gradle/wrapper/gradle-wrapper.jar
|
54
56
|
- gradle/wrapper/gradle-wrapper.properties
|
55
57
|
- gradlew
|
56
58
|
- gradlew.bat
|
57
59
|
- lib/embulk/filter/woothee.rb
|
58
60
|
- src/main/java/org/embulk/filter/WootheeFilterPlugin.java
|
61
|
+
- src/main/java/org/embulk/filter/WootheePageOutput.java
|
59
62
|
- src/test/java/org/embulk/filter/TestWootheeFilterPlugin.java
|
60
|
-
- classpath/embulk-filter-woothee-0.
|
63
|
+
- classpath/embulk-filter-woothee-0.3.0.jar
|
61
64
|
- classpath/woothee-java-1.2.0.jar
|
62
65
|
homepage: https://github.com/toyama0919/embulk-filter-woothee
|
63
66
|
licenses:
|