embulk-filter-woothee 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29b01b60845eb6e721d785cf6e3f7ba92e8417bf
4
- data.tar.gz: bbfc3a3bc6f53d60229bbcbe81e95806938a0198
3
+ metadata.gz: bf4f311a1e93643d40ce9d75f6eaacb383ab4501
4
+ data.tar.gz: a54509d8e75e0add139fd66db496719e60907b75
5
5
  SHA512:
6
- metadata.gz: 98ede3ff4cf315d83298117354d6ae000565abf5720de0ddaf14546988ba8bf8900fdfe62c45dd77f8112152fc01c5f8c0833a55b90b14e98acfd7f5897cf83e
7
- data.tar.gz: 8e450339dfa94cf18e80c33e50b963a15484526b7ce09c0f1d8aa2d41d624401ff52b7d651707deb0acb065679eb7268057b31f08e3bcd3f3dd76b44bcfcab6a
6
+ metadata.gz: 705ab844c47a63fd106a706046132280509cdcd0f88239d43be214e1ce1949613865cd68b7c90324ae79bd27af88e012b4ba342c163c95cec8d4876ad78a71bf
7
+ data.tar.gz: 843fdb0ec6028281116b3461bcfe246bb3e7a9301abebd50892d2a3e9e18744b899b0cc422651a67939fd42097e179b24dac864fb253ac7a031ba7a4c2f340f7
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  id "eclipse"
6
7
  }
7
8
  import com.github.jrubygradle.JRubyExec
@@ -13,14 +14,14 @@ configurations {
13
14
  provided
14
15
  }
15
16
 
16
- version = "0.2.3"
17
+ version = "0.3.0"
17
18
 
18
19
  sourceCompatibility = 1.7
19
20
  targetCompatibility = 1.7
20
21
 
21
22
  dependencies {
22
- compile "org.embulk:embulk-core:0.7.5"
23
- provided "org.embulk:embulk-core:0.7.5"
23
+ compile "org.embulk:embulk-core:0.8.9"
24
+ provided "org.embulk:embulk-core:0.8.9"
24
25
  compile 'is.tagomor.woothee:woothee-java:1.2.0'
25
26
  testCompile "junit:junit:4.+"
26
27
  }
@@ -32,6 +33,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
32
33
  }
33
34
  clean { delete "classpath" }
34
35
 
36
+ checkstyle {
37
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
38
+ toolVersion = '6.14.1'
39
+ }
40
+ checkstyleMain {
41
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
42
+ ignoreFailures = true
43
+ }
44
+ checkstyleTest {
45
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
46
+ ignoreFailures = true
47
+ }
48
+ task checkstyle(type: Checkstyle) {
49
+ classpath = sourceSets.main.output + sourceSets.test.output
50
+ source = sourceSets.main.allJava + sourceSets.test.allJava
51
+ }
52
+
35
53
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
36
54
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
37
55
  script "${project.name}.gemspec"
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -1,9 +1,6 @@
1
1
  package org.embulk.filter;
2
2
 
3
- import is.tagomor.woothee.Classifier;
4
-
5
3
  import java.util.List;
6
- import java.util.Map;
7
4
 
8
5
  import org.embulk.config.Config;
9
6
  import org.embulk.config.ConfigDefault;
@@ -11,18 +8,13 @@ import org.embulk.config.ConfigSource;
11
8
  import org.embulk.config.Task;
12
9
  import org.embulk.config.TaskSource;
13
10
  import org.embulk.spi.Column;
14
- import org.embulk.spi.Exec;
15
11
  import org.embulk.spi.FilterPlugin;
16
- import org.embulk.spi.Page;
17
- import org.embulk.spi.PageBuilder;
18
12
  import org.embulk.spi.PageOutput;
19
- import org.embulk.spi.PageReader;
20
13
  import org.embulk.spi.Schema;
21
14
  import org.embulk.spi.type.Types;
22
15
 
23
16
  import com.google.common.base.Optional;
24
17
  import com.google.common.collect.ImmutableList;
25
- import com.google.common.collect.Maps;
26
18
 
27
19
  public class WootheeFilterPlugin implements FilterPlugin
28
20
  {
@@ -70,6 +62,17 @@ public class WootheeFilterPlugin implements FilterPlugin
70
62
  {
71
63
  PluginTask task = config.loadConfig(PluginTask.class);
72
64
 
65
+ Schema outputSchema = buildOutputSchema(task, inputSchema);
66
+
67
+ control.run(task.dump(), outputSchema);
68
+ }
69
+
70
+ /**
71
+ * @param task
72
+ * @param inputSchema
73
+ * @return
74
+ */
75
+ private Schema buildOutputSchema(PluginTask task, Schema inputSchema) {
73
76
  ImmutableList.Builder<Column> builder = ImmutableList.builder();
74
77
  int i = 0;
75
78
  for (Column inputColumn: inputSchema.getColumns()) {
@@ -84,94 +87,12 @@ public class WootheeFilterPlugin implements FilterPlugin
84
87
  builder.add(new Column(i++, task.getOutKeyVendor(), Types.STRING));
85
88
  }
86
89
  Schema outputSchema = new Schema(builder.build());
87
-
88
- control.run(task.dump(), outputSchema);
90
+ return outputSchema;
89
91
  }
90
92
 
91
93
  @Override
92
94
  public PageOutput open(TaskSource taskSource, final Schema inputSchema, final Schema outputSchema, final PageOutput output)
93
95
  {
94
- final PluginTask task = taskSource.loadTask(PluginTask.class);
95
-
96
- final List<Column> outputColumns = outputSchema.getColumns();
97
- final List<Column> inputColumns = inputSchema.getColumns();
98
- Map<String, Column> inputColumnMap = Maps.newHashMap();
99
- final Map<String, Column> wootheeColumnMap = Maps.newHashMap();
100
- for (Column column : outputColumns) {
101
- if (!inputColumns.contains(column)) {
102
- wootheeColumnMap.put(column.getName(), column);
103
- } else {
104
- inputColumnMap.put(column.getName(), column);
105
- }
106
- }
107
- final Column keyNameColumn = inputColumnMap.get(task.getKeyName());
108
-
109
- return new PageOutput() {
110
- private PageReader reader = new PageReader(inputSchema);
111
- private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
112
-
113
- @Override
114
- public void finish() {
115
- builder.finish();
116
- }
117
-
118
- @Override
119
- public void close() {
120
- builder.close();
121
- }
122
-
123
- @Override
124
- public void add(Page page) {
125
- reader.setPage(page);
126
-
127
- while (reader.nextRecord()) {
128
- String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
129
- Map<String, String> ua = Classifier.parse(userAgentString);
130
- setValue(builder, ua);
131
- if (task.getFilterCategories().isPresent()) {
132
- if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
133
- builder.addRecord();
134
- }
135
- } else if (task.getDropCategories().isPresent()) {
136
- if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
137
- builder.addRecord();
138
- }
139
- } else {
140
- builder.addRecord();
141
- }
142
- }
143
- }
144
-
145
- /**
146
- * @param builder
147
- */
148
- private void setValue(PageBuilder builder, Map<String, String> ua) {
149
- if (task.getMergeAgentInfo()) {
150
- builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
151
- builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
152
- builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
153
- builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
154
- builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
155
- }
156
-
157
- for (Column inputColumn: inputColumns) {
158
- if (reader.isNull(inputColumn)) {
159
- builder.setNull(inputColumn);
160
- continue;
161
- }
162
- if (Types.STRING.equals(inputColumn.getType())) {
163
- builder.setString(inputColumn, reader.getString(inputColumn));
164
- } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
165
- builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
166
- } else if (Types.DOUBLE.equals(inputColumn.getType())) {
167
- builder.setDouble(inputColumn, reader.getDouble(inputColumn));
168
- } else if (Types.LONG.equals(inputColumn.getType())) {
169
- builder.setLong(inputColumn, reader.getLong(inputColumn));
170
- } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
171
- builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
172
- }
173
- }
174
- }
175
- };
96
+ return new WootheePageOutput(taskSource, inputSchema, outputSchema, output);
176
97
  }
177
98
  }
@@ -0,0 +1,113 @@
1
+ package org.embulk.filter;
2
+
3
+ import is.tagomor.woothee.Classifier;
4
+
5
+ import java.util.List;
6
+ import java.util.Map;
7
+
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.filter.WootheeFilterPlugin.PluginTask;
10
+ import org.embulk.spi.Column;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageBuilder;
14
+ import org.embulk.spi.PageOutput;
15
+ import org.embulk.spi.PageReader;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.type.Types;
18
+
19
+ import com.google.common.collect.Maps;
20
+
21
+ public class WootheePageOutput implements PageOutput
22
+ {
23
+ private final PluginTask task;
24
+ private final List<Column> outputColumns;
25
+ private final List<Column> inputColumns;
26
+ private final Map<String, Column> wootheeColumnMap;
27
+ private final Column keyNameColumn;
28
+ private final PageReader reader;
29
+ private final PageBuilder builder;
30
+
31
+ public WootheePageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
32
+ this.task = taskSource.loadTask(PluginTask.class);
33
+ this.outputColumns = outputSchema.getColumns();
34
+ this.inputColumns = inputSchema.getColumns();
35
+ Map<String, Column> inputColumnMap = Maps.newHashMap();
36
+ this.wootheeColumnMap = Maps.newHashMap();
37
+ for (Column column : outputColumns) {
38
+ if (!inputColumns.contains(column)) {
39
+ wootheeColumnMap.put(column.getName(), column);
40
+ } else {
41
+ inputColumnMap.put(column.getName(), column);
42
+ }
43
+ }
44
+ this.reader = new PageReader(inputSchema);
45
+ this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
46
+ this.keyNameColumn = inputColumnMap.get(task.getKeyName());
47
+ }
48
+
49
+ @Override
50
+ public void finish() {
51
+ builder.finish();
52
+ }
53
+
54
+ @Override
55
+ public void close() {
56
+ builder.close();
57
+ }
58
+
59
+ @Override
60
+ public void add(Page page) {
61
+ reader.setPage(page);
62
+
63
+ while (reader.nextRecord()) {
64
+ String userAgentString = reader.isNull(keyNameColumn) ? null : reader.getString(keyNameColumn);
65
+ Map<String, String> ua = Classifier.parse(userAgentString);
66
+ setValue(builder, ua);
67
+ if (task.getFilterCategories().isPresent()) {
68
+ if (task.getFilterCategories().orNull().contains(ua.get("category"))) {
69
+ builder.addRecord();
70
+ }
71
+ } else if (task.getDropCategories().isPresent()) {
72
+ if (!task.getDropCategories().orNull().contains(ua.get("category"))) {
73
+ builder.addRecord();
74
+ }
75
+ } else {
76
+ builder.addRecord();
77
+ }
78
+ }
79
+ }
80
+
81
+ /**
82
+ * @param builder
83
+ */
84
+ private void setValue(PageBuilder builder, Map<String, String> ua) {
85
+ if (task.getMergeAgentInfo()) {
86
+ builder.setString(wootheeColumnMap.get(task.getOutKeyOs()), ua.get("os"));
87
+ builder.setString(wootheeColumnMap.get(task.getOutKeyName()), ua.get("name"));
88
+ builder.setString(wootheeColumnMap.get(task.getOutKeyCategory()), ua.get("category"));
89
+ builder.setString(wootheeColumnMap.get(task.getOutKeyVersion()), ua.get("version"));
90
+ builder.setString(wootheeColumnMap.get(task.getOutKeyVendor()), ua.get("vendor"));
91
+ }
92
+
93
+ for (Column inputColumn: inputColumns) {
94
+ if (reader.isNull(inputColumn)) {
95
+ builder.setNull(inputColumn);
96
+ continue;
97
+ }
98
+ if (Types.STRING.equals(inputColumn.getType())) {
99
+ builder.setString(inputColumn, reader.getString(inputColumn));
100
+ } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
101
+ builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
102
+ } else if (Types.DOUBLE.equals(inputColumn.getType())) {
103
+ builder.setDouble(inputColumn, reader.getDouble(inputColumn));
104
+ } else if (Types.LONG.equals(inputColumn.getType())) {
105
+ builder.setLong(inputColumn, reader.getLong(inputColumn));
106
+ } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
107
+ builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
108
+ } else if (Types.JSON.equals(inputColumn.getType())) {
109
+ builder.setJson(inputColumn, reader.getJson(inputColumn));
110
+ }
111
+ }
112
+ }
113
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-woothee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2016-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Woothee filter plugin for Embulk
42
42
  email:
43
43
  - toyama0919@gmail.com
@@ -50,14 +50,17 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - gradle/wrapper/gradle-wrapper.jar
54
56
  - gradle/wrapper/gradle-wrapper.properties
55
57
  - gradlew
56
58
  - gradlew.bat
57
59
  - lib/embulk/filter/woothee.rb
58
60
  - src/main/java/org/embulk/filter/WootheeFilterPlugin.java
61
+ - src/main/java/org/embulk/filter/WootheePageOutput.java
59
62
  - src/test/java/org/embulk/filter/TestWootheeFilterPlugin.java
60
- - classpath/embulk-filter-woothee-0.2.3.jar
63
+ - classpath/embulk-filter-woothee-0.3.0.jar
61
64
  - classpath/woothee-java-1.2.0.jar
62
65
  homepage: https://github.com/toyama0919/embulk-filter-woothee
63
66
  licenses: