embulk-filter-woothee 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29b01b60845eb6e721d785cf6e3f7ba92e8417bf
4
- data.tar.gz: bbfc3a3bc6f53d60229bbcbe81e95806938a0198
3
+ metadata.gz: bf4f311a1e93643d40ce9d75f6eaacb383ab4501
4
+ data.tar.gz: a54509d8e75e0add139fd66db496719e60907b75
5
5
  SHA512:
6
- metadata.gz: 98ede3ff4cf315d83298117354d6ae000565abf5720de0ddaf14546988ba8bf8900fdfe62c45dd77f8112152fc01c5f8c0833a55b90b14e98acfd7f5897cf83e
7
- data.tar.gz: 8e450339dfa94cf18e80c33e50b963a15484526b7ce09c0f1d8aa2d41d624401ff52b7d651707deb0acb065679eb7268057b31f08e3bcd3f3dd76b44bcfcab6a
6
+ metadata.gz: 705ab844c47a63fd106a706046132280509cdcd0f88239d43be214e1ce1949613865cd68b7c90324ae79bd27af88e012b4ba342c163c95cec8d4876ad78a71bf
7
+ data.tar.gz: 843fdb0ec6028281116b3461bcfe246bb3e7a9301abebd50892d2a3e9e18744b899b0cc422651a67939fd42097e179b24dac864fb253ac7a031ba7a4c2f340f7
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  id "eclipse"
6
7
  }
7
8
  import com.github.jrubygradle.JRubyExec
@@ -13,14 +14,14 @@ configurations {
13
14
  provided
14
15
  }
15
16
 
16
- version = "0.2.3"
17
+ version = "0.3.0"
17
18
 
18
19
  sourceCompatibility = 1.7
19
20
  targetCompatibility = 1.7
20
21
 
21
22
  dependencies {
22
- compile "org.embulk:embulk-core:0.7.5"
23
- provided "org.embulk:embulk-core:0.7.5"
23
+ compile "org.embulk:embulk-core:0.8.9"
24
+ provided "org.embulk:embulk-core:0.8.9"
24
25
  compile 'is.tagomor.woothee:woothee-java:1.2.0'
25
26
  testCompile "junit:junit:4.+"
26
27
  }
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
32
33
  }
33
34
  clean { delete "classpath" }
34
35
 
36
+ checkstyle {
37
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
38
+ toolVersion = '6.14.1'
39
+ }
40
+ checkstyleMain {
41
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
42
+ ignoreFailures = true
43
+ }
44
+ checkstyleTest {
45
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
46
+ ignoreFailures = true
47
+ }
48
+ task checkstyle(type: Checkstyle) {
49
+ classpath = sourceSets.main.output + sourceSets.test.output
50
+ source = sourceSets.main.allJava + sourceSets.test.allJava
51
+ }
52
+
35
53
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
36
54
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
37
55
  script "${project.name}.gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -1,9 +1,6 @@
1
1
  package org.embulk.filter;
2
2
 
3
- import is.tagomor.woothee.Classifier;
4
-
5
3
  import java.util.List;
6
- import java.util.Map;
7
4
 
8
5
  import org.embulk.config.Config;
9
6
  import org.embulk.config.ConfigDefault;
@@ -11,18 +8,13 @@ import org.embulk.config.ConfigSource;
11
8
  import org.embulk.config.Task;
12
9
  import org.embulk.config.TaskSource;
13
10
  import org.embulk.spi.Column;
14
- import org.embulk.spi.Exec;
15
11
  import org.embulk.spi.FilterPlugin;
16
- import org.embulk.spi.Page;
17
- import org.embulk.spi.PageBuilder;
18
12
  import org.embulk.spi.PageOutput;
19
- import org.embulk.spi.PageReader;
20
13
  import org.embulk.spi.Schema;
21
14
  import org.embulk.spi.type.Types;
22
15
 
23
16
  import com.google.common.base.Optional;
24
17
  import com.google.common.collect.ImmutableList;
25
- import com.google.common.collect.Maps;
26
18
 
27
19
  public class WootheeFilterPlugin implements FilterPlugin
28
20
  {
@@ -70,6 +62,17 @@ public class WootheeFilterPlugin implements FilterPlugin
70
62
  {
71
63
  PluginTask task = config.loadConfig(PluginTask.class);
72
64
 
65
+ Schema outputSchema = buildOutputSchema(task, inputSchema);
66
+
67
+ control.run(task.dump(), outputSchema);
68
+ }
69
+
70
+ /**
71
+ * @param task
72
+ * @param inputSchema
73
+ * @return
74
+ */
75
+ private Schema buildOutputSchema(PluginTask task, Schema inputSchema) {
73
76
  ImmutableList.Builder<Column> builder = ImmutableList.builder();
74
77
  int i = 0;
75
78
  for (Column inputColumn: inputSchema.getColumns()) {
@@ -84,94 +87,12 @@ public class WootheeFilterPlugin implements FilterPlugin
84
87
  builder.add(new Column(i++, task.getOutKeyVendor(), Types.STRING));
85
88
  }
86
89
  Schema outputSchema = new Schema(builder.build());
87
-
88
- control.run(task.dump(), outputSchema);
90
+ return outputSchema;
89
91
  }
90
92
 
91
93
  @Override
92
94
  public PageOutput open(TaskSource taskSource, final Schema inputSchema, final Schema outputSchema, final PageOutput output)
93
95
  {
94
- final PluginTask task = taskSource.loadTask(PluginTask.class);
95
-
96
- final List<Column> outputColumns = outputSchema.getColumns();
97
- final List<Column> inputColumns = inputSchema.getColumns();
98
- Map<String, Column> inputColumnMap = Maps.newHashMap();
99
- final Map<String, Column> wootheeColumnMap = Maps.newHashMap();
100
- for (Column column : outputColumns) {
101
- if (!inputColumns.contains(column)) {
102
- wootheeColumnMap.put(column.getName(), column);
103
- } else {
104
- inputColumnMap.put(column.getName(), column);
105
- }
106
- }
107
- final Column keyNameColumn = inputColumnMap.get(task.getKeyName());
108
-
109
- return new PageOutput() {
110
- private PageReader reader = new PageReader(inputSchema);
111
- private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
112
-
113
- @Override
114
- public void finish() {
115
- builder.finish();
116
- }
117
-
118
- @Override
119
- public void close() {
120
- builder.close();
121
- }
122
-
123
- @Override
124
- public void add(Page page) {
125
- reader.setPage(page);
126
-
127
- while (reader.nextRecord()) {
128
- String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
129
- Map<String, String> ua = Classifier.parse(userAgentString);
130
- setValue(builder, ua);
131
- if (task.getFilterCategories().isPresent()) {
132
- if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
133
- builder.addRecord();
134
- }
135
- } else if (task.getDropCategories().isPresent()) {
136
- if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
137
- builder.addRecord();
138
- }
139
- } else {
140
- builder.addRecord();
141
- }
142
- }
143
- }
144
-
145
- /**
146
- * @param builder
147
- */
148
- private void setValue(PageBuilder builder, Map<String, String> ua) {
149
- if (task.getMergeAgentInfo()) {
150
- builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
151
- builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
152
- builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
153
- builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
154
- builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
155
- }
156
-
157
- for (Column inputColumn: inputColumns) {
158
- if (reader.isNull(inputColumn)) {
159
- builder.setNull(inputColumn);
160
- continue;
161
- }
162
- if (Types.STRING.equals(inputColumn.getType())) {
163
- builder.setString(inputColumn, reader.getString(inputColumn));
164
- } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
165
- builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
166
- } else if (Types.DOUBLE.equals(inputColumn.getType())) {
167
- builder.setDouble(inputColumn, reader.getDouble(inputColumn));
168
- } else if (Types.LONG.equals(inputColumn.getType())) {
169
- builder.setLong(inputColumn, reader.getLong(inputColumn));
170
- } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
171
- builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
172
- }
173
- }
174
- }
175
- };
96
+ return new WootheePageOutput(taskSource, inputSchema, outputSchema, output);
176
97
  }
177
98
  }
@@ -0,0 +1,113 @@
1
+ package org.embulk.filter;
2
+
3
+ import is.tagomor.woothee.Classifier;
4
+
5
+ import java.util.List;
6
+ import java.util.Map;
7
+
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.filter.WootheeFilterPlugin.PluginTask;
10
+ import org.embulk.spi.Column;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageBuilder;
14
+ import org.embulk.spi.PageOutput;
15
+ import org.embulk.spi.PageReader;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.type.Types;
18
+
19
+ import com.google.common.collect.Maps;
20
+
21
+ public class WootheePageOutput implements PageOutput
22
+ {
23
+ private final PluginTask task;
24
+ private final List<Column> outputColumns;
25
+ private final List<Column> inputColumns;
26
+ private final Map<String, Column> wootheeColumnMap;
27
+ private final Column keyNameColumn;
28
+ private final PageReader reader;
29
+ private final PageBuilder builder;
30
+
31
+ public WootheePageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
32
+ this.task = taskSource.loadTask(PluginTask.class);
33
+ this.outputColumns = outputSchema.getColumns();
34
+ this.inputColumns = inputSchema.getColumns();
35
+ Map<String, Column> inputColumnMap = Maps.newHashMap();
36
+ this.wootheeColumnMap = Maps.newHashMap();
37
+ for (Column column : outputColumns) {
38
+ if (!inputColumns.contains(column)) {
39
+ wootheeColumnMap.put(column.getName(), column);
40
+ } else {
41
+ inputColumnMap.put(column.getName(), column);
42
+ }
43
+ }
44
+ this.reader = new PageReader(inputSchema);
45
+ this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
46
+ this.keyNameColumn = inputColumnMap.get(task.getKeyName());
47
+ }
48
+
49
+ @Override
50
+ public void finish() {
51
+ builder.finish();
52
+ }
53
+
54
+ @Override
55
+ public void close() {
56
+ builder.close();
57
+ }
58
+
59
+ @Override
60
+ public void add(Page page) {
61
+ reader.setPage(page);
62
+
63
+ while (reader.nextRecord()) {
64
+ String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
65
+ Map<String, String> ua = Classifier.parse(userAgentString);
66
+ setValue(builder, ua);
67
+ if (task.getFilterCategories().isPresent()) {
68
+ if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
69
+ builder.addRecord();
70
+ }
71
+ } else if (task.getDropCategories().isPresent()) {
72
+ if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
73
+ builder.addRecord();
74
+ }
75
+ } else {
76
+ builder.addRecord();
77
+ }
78
+ }
79
+ }
80
+
81
+ /**
82
+ * @param builder
83
+ */
84
+ private void setValue(PageBuilder builder, Map<String, String> ua) {
85
+ if (task.getMergeAgentInfo()) {
86
+ builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
87
+ builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
88
+ builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
89
+ builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
90
+ builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
91
+ }
92
+
93
+ for (Column inputColumn: inputColumns) {
94
+ if (reader.isNull(inputColumn)) {
95
+ builder.setNull(inputColumn);
96
+ continue;
97
+ }
98
+ if (Types.STRING.equals(inputColumn.getType())) {
99
+ builder.setString(inputColumn, reader.getString(inputColumn));
100
+ } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
101
+ builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
102
+ } else if (Types.DOUBLE.equals(inputColumn.getType())) {
103
+ builder.setDouble(inputColumn, reader.getDouble(inputColumn));
104
+ } else if (Types.LONG.equals(inputColumn.getType())) {
105
+ builder.setLong(inputColumn, reader.getLong(inputColumn));
106
+ } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
107
+ builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
108
+ } else if (Types.JSON.equals(inputColumn.getType())) {
109
+ builder.setJson(inputColumn, reader.getJson(inputColumn));
110
+ }
111
+ }
112
+ }
113
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-woothee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2016-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Woothee filter plugin for Embulk
42
42
  email:
43
43
  - toyama0919@gmail.com
@@ -50,14 +50,17 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - gradle/wrapper/gradle-wrapper.jar
54
56
  - gradle/wrapper/gradle-wrapper.properties
55
57
  - gradlew
56
58
  - gradlew.bat
57
59
  - lib/embulk/filter/woothee.rb
58
60
  - src/main/java/org/embulk/filter/WootheeFilterPlugin.java
61
+ - src/main/java/org/embulk/filter/WootheePageOutput.java
59
62
  - src/test/java/org/embulk/filter/TestWootheeFilterPlugin.java
60
- - classpath/embulk-filter-woothee-0.2.3.jar
63
+ - classpath/embulk-filter-woothee-0.3.0.jar
61
64
  - classpath/woothee-java-1.2.0.jar
62
65
  homepage: https://github.com/toyama0919/embulk-filter-woothee
63
66
  licenses: