embulk-filter-distinct 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d599cdfb56035c6d6cb89ac29440ed549ccbecec
4
- data.tar.gz: 9f78869da19e5367ff6ecdc8281fcc634dd13edf
3
+ metadata.gz: 812453d66d083c75d383cc6df8431158d7e81551
4
+ data.tar.gz: 8f294e26ebe536ef633793cd73aded5e83e618e5
5
5
  SHA512:
6
- metadata.gz: 488c534c6fe14eb5cb63c267b96c4f725a15f5cdc608ca5ea9a7dc05a3f9f3aeb49dd620ed3b6b82e2ee4cc69cdadd4090f76b7115edb6ae47ae4388d3c27f69
7
- data.tar.gz: bed20d3550b27ab998d11a2dad42ad66a855c467c06fdce3a850de537d9f3d9b23fd21aaeeb4d7ef984c295d2534fc2d387711edac4c1e6530b391de82ca5ab8
6
+ metadata.gz: e46ac798104936c4a36badc293f9a93295f74bcbfed9ad71c337b9c5abd5087f7109a3f1ca1e657872eb51dd315d97d1a7a5999d329904df89264a18603734ff
7
+ data.tar.gz: dabc9edcf9e3593f467c0859bee7aa370b155b7492bdb957fefa3611e5b5c9a11de21ef40a363b969a0f072cd65498e0f1433b54f9043adfd63d45c0a132a394
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ 0.0.4 (2017-08-03)
2
+ ==================
3
+
4
+ - Migrate to Embulk v0.8.18
5
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/4
6
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/5
7
+ - Add tests
8
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/8
9
+
1
10
  0.0.3 (2016-01-05)
2
11
  ==================
3
12
 
data/build.gradle CHANGED
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  // For test/coverage
6
7
  id "com.github.kt3k.coveralls" version "2.4.0"
7
8
  id "jacoco"
@@ -15,16 +16,15 @@ configurations {
15
16
  provided
16
17
  }
17
18
 
18
- version = "0.0.3"
19
+ version = "0.0.4"
19
20
  sourceCompatibility = 1.7
20
21
  targetCompatibility = 1.7
21
22
 
22
23
  dependencies {
23
- compile "org.embulk:embulk-core:0.7.+"
24
- provided "org.embulk:embulk-core:0.7.+"
25
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
24
+ compile "org.embulk:embulk-core:0.8.18"
25
+ provided "org.embulk:embulk-core:0.8.18"
26
26
  testCompile "junit:junit:4.+"
27
- testCompile "org.embulk:embulk-core:0.7.+:tests"
27
+ testCompile "org.embulk:embulk-core:0.8.18:tests"
28
28
  }
29
29
 
30
30
  jacocoTestReport {
@@ -41,6 +41,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
41
41
  }
42
42
  clean { delete "classpath" }
43
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
60
+
44
61
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
45
62
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
46
63
  script "${project.name}.gemspec"
@@ -76,7 +93,6 @@ Gem::Specification.new do |spec|
76
93
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
77
94
  spec.require_paths = ["lib"]
78
95
 
79
- #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
80
96
  spec.add_development_dependency 'bundler', ['~> 1.0']
81
97
  spec.add_development_dependency 'rake', ['>= 10.0']
82
98
  end
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
Binary file
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -7,9 +7,6 @@ import org.embulk.spi.PageBuilder;
7
7
  import org.embulk.spi.PageReader;
8
8
  import org.slf4j.Logger;
9
9
 
10
- /**
11
- * Created by takahiro.nakayama on 12/6/15.
12
- */
13
10
  class ColumnVisitorImpl
14
11
  implements ColumnVisitor
15
12
  {
@@ -58,6 +55,17 @@ class ColumnVisitorImpl
58
55
  }
59
56
  }
60
57
 
58
+ @Override
59
+ public void jsonColumn(Column outputColumn)
60
+ {
61
+ if (pageReader.isNull(outputColumn)) {
62
+ pageBuilder.setNull(outputColumn);
63
+ }
64
+ else {
65
+ pageBuilder.setJson(outputColumn, pageReader.getJson(outputColumn));
66
+ }
67
+ }
68
+
61
69
  @Override
62
70
  public void timestampColumn(Column outputColumn) {
63
71
  if (pageReader.isNull(outputColumn)) {
@@ -17,9 +17,6 @@ import org.slf4j.Logger;
17
17
  import java.util.List;
18
18
  import java.util.Set;
19
19
 
20
- /**
21
- * Created by takahiro.nakayama on 12/6/15.
22
- */
23
20
  class DistinctFilterPageOutput
24
21
  implements PageOutput
25
22
  {
@@ -90,6 +87,9 @@ class DistinctFilterPageOutput
90
87
  else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
91
88
  builder.add(pageReader.getTimestamp(distinctColumn));
92
89
  }
90
+ else if (Types.JSON.equals(distinctColumn.getType())) {
91
+ builder.add(pageReader.getJson(distinctColumn));
92
+ }
93
93
  else {
94
94
  throw new RuntimeException("unsupported type: " + distinctColumn.getType());
95
95
  }
@@ -1,5 +1,490 @@
1
1
  package org.embulk.filter.distinct;
2
2
 
3
+ import com.google.common.collect.Lists;
4
+ import com.google.common.collect.Sets;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigLoader;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.Page;
11
+ import org.embulk.spi.PageOutput;
12
+ import org.embulk.spi.PageTestUtils;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
15
+ import org.embulk.spi.json.JsonParser;
16
+ import org.embulk.spi.time.Timestamp;
17
+ import org.embulk.spi.type.Type;
18
+ import org.embulk.spi.type.Types;
19
+ import org.embulk.spi.util.Pages;
20
+ import org.junit.Before;
21
+ import org.junit.Rule;
22
+ import org.junit.Test;
23
+ import org.junit.rules.ExpectedException;
24
+
25
+ import java.lang.reflect.Field;
26
+ import java.lang.reflect.Modifier;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.filter.distinct.DistinctFilterPlugin.PluginTask;
30
+ import static org.embulk.spi.FilterPlugin.Control;
31
+ import static org.junit.Assert.assertEquals;
32
+ import static org.junit.Assert.assertNull;
33
+
3
34
  public class TestDistinctFilterPlugin
4
35
  {
36
+ @Rule
37
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
38
+
39
+ @Rule
40
+ public ExpectedException exception = ExpectedException.none();
41
+
42
+ private static JsonParser jsonParser = new JsonParser();
43
+ private Schema schema;
44
+ private DistinctFilterPlugin plugin;
45
+
46
+ // http://stackoverflow.com/questions/3301635/change-private-static-final-field-using-java-reflection
47
+ private static void setFinalStaticVariable(Field field, Object newValue)
48
+ throws IllegalAccessException, NoSuchFieldException
49
+ {
50
+ field.setAccessible(true);
51
+
52
+ Field modifiersField = Field.class.getDeclaredField("modifiers");
53
+ modifiersField.setAccessible(true);
54
+ modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
55
+
56
+ field.set(null, newValue);
57
+ }
58
+
59
+ private Schema schema(Object... nameAndTypes)
60
+ {
61
+ Schema.Builder builder = Schema.builder();
62
+ for (int i = 0; i < nameAndTypes.length; i += 2) {
63
+ builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
64
+ }
65
+ return builder.build();
66
+ }
67
+
68
+ private ConfigSource loadConfigFromYaml(String yaml)
69
+ {
70
+ ConfigLoader loader = new ConfigLoader(runtime.getModelManager());
71
+ return loader.fromYamlString(yaml);
72
+ }
73
+
74
+ @Before
75
+ public void setupDefault()
76
+ {
77
+ schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
78
+ plugin = new DistinctFilterPlugin();
79
+ }
80
+
81
+ @Before
82
+ public void resetStatic()
83
+ throws IllegalAccessException, NoSuchFieldException
84
+ {
85
+ Field set = DistinctFilterPageOutput.class.getDeclaredField("set");
86
+ setFinalStaticVariable(set, Sets.<List<Object>>newConcurrentHashSet());
87
+ }
88
+
89
+ @Test
90
+ public void testConfigure()
91
+ {
92
+ String yaml = "" +
93
+ "type: distinct\n" +
94
+ "columns: [_c0, _c1]\n";
95
+
96
+ ConfigSource config = loadConfigFromYaml(yaml);
97
+ PluginTask task = config.loadConfig(PluginTask.class);
98
+
99
+ assertEquals(Lists.newArrayList("_c0", "_c1"), task.getDistinctColumnNames());
100
+ }
101
+
102
+ @Test
103
+ public void testConfigureInjectedTask()
104
+ {
105
+ String yaml = "" +
106
+ "type: distinct\n" +
107
+ "columns: [_c0, _c1]\n";
108
+
109
+ ConfigSource config = loadConfigFromYaml(yaml);
110
+
111
+ plugin.transaction(config, schema, new Control() {
112
+ @Override
113
+ public void run(TaskSource taskSource, Schema outputSchema)
114
+ {
115
+ PluginTask task = taskSource.loadTask(PluginTask.class);
116
+
117
+ List<Column> columns = Lists.newArrayList();
118
+ columns.add(new Column(0, "_c0", Types.STRING));
119
+ columns.add(new Column(1, "_c1", Types.STRING));
120
+
121
+ assertEquals(columns, task.getDistinctColumns());
122
+ }
123
+ });
124
+ }
125
+
126
+ @Test
127
+ public void testDistinctBySingleColumn()
128
+ {
129
+ String yaml = "" +
130
+ "type: distinct\n" +
131
+ "columns: [_c0]\n";
132
+
133
+ ConfigSource config = loadConfigFromYaml(yaml);
134
+ plugin.transaction(config, schema, new Control() {
135
+ @Override
136
+ public void run(TaskSource taskSource, Schema outputSchema)
137
+ {
138
+ MockPageOutput output = new MockPageOutput();
139
+
140
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
141
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
142
+ "a", "a", // row: 1
143
+ "a", "a", // row: 2
144
+ "a", "b", // row: 3
145
+ "b", "b", // row: 4
146
+ "b", "a", // row: 5
147
+ "b", "b", // row: 6
148
+ null, "a", // row: 7
149
+ null, "b" // row: 8
150
+ )
151
+ ) {
152
+ pageOutput.add(page);
153
+ }
154
+ pageOutput.finish();
155
+ }
156
+
157
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
158
+ assertEquals(3, records.size());
159
+
160
+ Object[] record1 = records.get(0);
161
+ assertEquals("a", record1[0]);
162
+ assertEquals("a", record1[1]);
163
+
164
+ Object[] record2 = records.get(1);
165
+ assertEquals("b", record2[0]);
166
+ assertEquals("b", record2[1]);
167
+
168
+ Object[] record3 = records.get(2);
169
+ assertNull(record3[0]);
170
+ assertEquals("a", record3[1]);
171
+ }
172
+ });
173
+ }
174
+
175
+ @Test
176
+ public void testDistinctByMultipleColumns()
177
+ {
178
+ String yaml = "" +
179
+ "type: distinct\n" +
180
+ "columns: [_c0, _c1]\n";
181
+
182
+ ConfigSource config = loadConfigFromYaml(yaml);
183
+ plugin.transaction(config, schema, new Control() {
184
+ @Override
185
+ public void run(TaskSource taskSource, Schema outputSchema)
186
+ {
187
+ MockPageOutput output = new MockPageOutput();
188
+
189
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
190
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
191
+ "a", "a", // row: 1
192
+ "a", "a", // row: 2
193
+ "a", "b", // row: 3
194
+ "b", "b", // row: 4
195
+ "b", "a", // row: 5
196
+ "b", "b", // row: 6
197
+ null, "a", // row: 7
198
+ null, "b" // row: 8
199
+ )
200
+ ) {
201
+ pageOutput.add(page);
202
+ }
203
+ pageOutput.finish();
204
+ }
205
+
206
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
207
+ assertEquals(6, records.size());
208
+
209
+ Object[] record1 = records.get(0);
210
+ assertEquals("a", record1[0]);
211
+ assertEquals("a", record1[1]);
212
+
213
+ Object[] record2 = records.get(1);
214
+ assertEquals("a", record2[0]);
215
+ assertEquals("b", record2[1]);
216
+
217
+ Object[] record3 = records.get(2);
218
+ assertEquals("b", record3[0]);
219
+ assertEquals("b", record3[1]);
220
+
221
+ Object[] record4 = records.get(3);
222
+ assertEquals("b", record4[0]);
223
+ assertEquals("a", record4[1]);
224
+
225
+ Object[] record5 = records.get(4);
226
+ assertNull(record5[0]);
227
+ assertEquals("a", record5[1]);
228
+
229
+ Object[] record6 = records.get(5);
230
+ assertNull(record6[0]);
231
+ assertEquals("b", record6[1]);
232
+ }
233
+ });
234
+ }
235
+
236
+ @Test
237
+ public void testDistinctByLongColumn()
238
+ {
239
+ schema = schema("_c0", Types.LONG, "_c1", Types.STRING);
240
+ String yaml = "" +
241
+ "type: distinct\n" +
242
+ "columns: [_c0]\n";
243
+
244
+ ConfigSource config = loadConfigFromYaml(yaml);
245
+ plugin.transaction(config, schema, new Control() {
246
+ @Override
247
+ public void run(TaskSource taskSource, Schema outputSchema)
248
+ {
249
+ MockPageOutput output = new MockPageOutput();
250
+
251
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
252
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
253
+ 1L, "a", // row: 1
254
+ 1L, "a", // row: 2
255
+ 1L, "b", // row: 3
256
+ 2L, "b", // row: 4
257
+ 2L, "a", // row: 5
258
+ 2L, "b", // row: 6
259
+ null, "a", // row: 7
260
+ null, "b" // row: 8
261
+ )
262
+ ) {
263
+ pageOutput.add(page);
264
+ }
265
+ pageOutput.finish();
266
+ }
267
+
268
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
269
+ assertEquals(3, records.size());
270
+
271
+ Object[] record1 = records.get(0);
272
+ assertEquals(1L, record1[0]);
273
+ assertEquals("a", record1[1]);
274
+
275
+ Object[] record2 = records.get(1);
276
+ assertEquals(2L, record2[0]);
277
+ assertEquals("b", record2[1]);
278
+
279
+ Object[] record3 = records.get(2);
280
+ assertNull(record3[0]);
281
+ assertEquals("a", record3[1]);
282
+ }
283
+ });
284
+ }
285
+
286
+ @Test
287
+ public void testDistinctByDoubleColumn()
288
+ {
289
+ schema = schema("_c0", Types.DOUBLE, "_c1", Types.STRING);
290
+ String yaml = "" +
291
+ "type: distinct\n" +
292
+ "columns: [_c0]\n";
293
+
294
+ ConfigSource config = loadConfigFromYaml(yaml);
295
+ plugin.transaction(config, schema, new Control() {
296
+ @Override
297
+ public void run(TaskSource taskSource, Schema outputSchema)
298
+ {
299
+ MockPageOutput output = new MockPageOutput();
300
+
301
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
302
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
303
+ 1.1, "a", // row: 1
304
+ 1.1, "a", // row: 2
305
+ 1.1, "b", // row: 3
306
+ 2.2, "b", // row: 4
307
+ 2.2, "a", // row: 5
308
+ 2.2, "b", // row: 6
309
+ null, "a", // row: 7
310
+ null, "b" // row: 8
311
+ )
312
+ ) {
313
+ pageOutput.add(page);
314
+ }
315
+ pageOutput.finish();
316
+ }
317
+
318
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
319
+ assertEquals(3, records.size());
320
+
321
+ Object[] record1 = records.get(0);
322
+ assertEquals(1.1, record1[0]);
323
+ assertEquals("a", record1[1]);
324
+
325
+ Object[] record2 = records.get(1);
326
+ assertEquals(2.2, record2[0]);
327
+ assertEquals("b", record2[1]);
328
+
329
+ Object[] record3 = records.get(2);
330
+ assertNull(record3[0]);
331
+ assertEquals("a", record3[1]);
332
+ }
333
+ });
334
+ }
335
+
336
+ @Test
337
+ public void testDistinctByBooleanColumn()
338
+ {
339
+ schema = schema("_c0", Types.BOOLEAN, "_c1", Types.STRING);
340
+ String yaml = "" +
341
+ "type: distinct\n" +
342
+ "columns: [_c0]\n";
343
+
344
+ ConfigSource config = loadConfigFromYaml(yaml);
345
+ plugin.transaction(config, schema, new Control() {
346
+ @Override
347
+ public void run(TaskSource taskSource, Schema outputSchema)
348
+ {
349
+ MockPageOutput output = new MockPageOutput();
350
+
351
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
352
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
353
+ true, "a", // row: 1
354
+ true, "a", // row: 2
355
+ true, "b", // row: 3
356
+ false, "b", // row: 4
357
+ false, "a", // row: 5
358
+ false, "b", // row: 6
359
+ null, "a", // row: 7
360
+ null, "b" // row: 8
361
+ )
362
+ ) {
363
+ pageOutput.add(page);
364
+ }
365
+ pageOutput.finish();
366
+ }
367
+
368
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
369
+ assertEquals(3, records.size());
370
+
371
+ Object[] record1 = records.get(0);
372
+ assertEquals(true, record1[0]);
373
+ assertEquals("a", record1[1]);
374
+
375
+ Object[] record2 = records.get(1);
376
+ assertEquals(false, record2[0]);
377
+ assertEquals("b", record2[1]);
378
+
379
+ Object[] record3 = records.get(2);
380
+ assertNull(record3[0]);
381
+ assertEquals("a", record3[1]);
382
+ }
383
+ });
384
+ }
385
+
386
+
387
+ @Test
388
+ public void testDistinctByTimestampColumn()
389
+ {
390
+ schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.STRING);
391
+ String yaml = "" +
392
+ "type: distinct\n" +
393
+ "columns: [_c0]\n";
394
+
395
+ ConfigSource config = loadConfigFromYaml(yaml);
396
+ plugin.transaction(config, schema, new Control() {
397
+ @Override
398
+ public void run(TaskSource taskSource, Schema outputSchema)
399
+ {
400
+ MockPageOutput output = new MockPageOutput();
401
+
402
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
403
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
404
+ Timestamp.ofEpochSecond(1), "a", // row: 1
405
+ Timestamp.ofEpochSecond(1), "a", // row: 2
406
+ Timestamp.ofEpochSecond(1), "b", // row: 3
407
+ Timestamp.ofEpochSecond(2), "b", // row: 4
408
+ Timestamp.ofEpochSecond(2), "a", // row: 5
409
+ Timestamp.ofEpochSecond(2), "b", // row: 6
410
+ null, "a", // row: 7
411
+ null, "b" // row: 8
412
+ )
413
+ ) {
414
+ pageOutput.add(page);
415
+ }
416
+ pageOutput.finish();
417
+ }
418
+
419
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
420
+ assertEquals(3, records.size());
421
+
422
+ Object[] record1 = records.get(0);
423
+ assertEquals(Timestamp.ofEpochSecond(1), record1[0]);
424
+ assertEquals("a", record1[1]);
425
+
426
+ Object[] record2 = records.get(1);
427
+ assertEquals(Timestamp.ofEpochSecond(2), record2[0]);
428
+ assertEquals("b", record2[1]);
429
+
430
+ Object[] record3 = records.get(2);
431
+ assertNull(record3[0]);
432
+ assertEquals("a", record3[1]);
433
+ }
434
+ });
435
+ }
436
+
437
+
438
+ @Test
439
+ public void testDistinctByJsonColumn()
440
+ {
441
+ schema = schema("_c0", Types.JSON, "_c1", Types.STRING);
442
+ String yaml = "" +
443
+ "type: distinct\n" +
444
+ "columns: [_c0]\n";
445
+
446
+ final String json1 = "{\"a\":1,\"b\":\"b\"}";
447
+ final String json2 = "{\"a\":2,\"b\":\"b\"}";
448
+
449
+ ConfigSource config = loadConfigFromYaml(yaml);
450
+ plugin.transaction(config, schema, new Control() {
451
+ @Override
452
+ public void run(TaskSource taskSource, Schema outputSchema)
453
+ {
454
+ MockPageOutput output = new MockPageOutput();
455
+
456
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
457
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
458
+ jsonParser.parse(json1), "a", // row: 1
459
+ jsonParser.parse(json1), "a", // row: 2
460
+ jsonParser.parse(json1), "b", // row: 3
461
+ jsonParser.parse(json2), "b", // row: 4
462
+ jsonParser.parse(json2), "a", // row: 5
463
+ jsonParser.parse(json2), "b", // row: 6
464
+ null, "a", // row: 7
465
+ null, "b" // row: 8
466
+ )
467
+ ) {
468
+ pageOutput.add(page);
469
+ }
470
+ pageOutput.finish();
471
+ }
472
+
473
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
474
+ assertEquals(3, records.size());
475
+
476
+ Object[] record1 = records.get(0);
477
+ assertEquals(jsonParser.parse(json1), record1[0]);
478
+ assertEquals("a", record1[1]);
479
+
480
+ Object[] record2 = records.get(1);
481
+ assertEquals(jsonParser.parse(json2), record2[0]);
482
+ assertEquals("b", record2[1]);
483
+
484
+ Object[] record3 = records.get(2);
485
+ assertNull(record3[0]);
486
+ assertEquals("a", record3[1]);
487
+ }
488
+ });
489
+ }
5
490
  }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-distinct
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-05 00:00:00.000000000 Z
11
+ date: 2017-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Distinct
42
42
  email:
43
43
  - civitaspo@gmail.com
@@ -50,6 +50,8 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - example/config.yml
54
56
  - example/data.csv
55
57
  - gradle/wrapper/gradle-wrapper.jar
@@ -61,7 +63,7 @@ files:
61
63
  - src/main/java/org/embulk/filter/distinct/DistinctFilterPageOutput.java
62
64
  - src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
63
65
  - src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
64
- - classpath/embulk-filter-distinct-0.0.3.jar
66
+ - classpath/embulk-filter-distinct-0.0.4.jar
65
67
  homepage: https://github.com/civitaspo/embulk-filter-distinct
66
68
  licenses:
67
69
  - MIT