embulk-filter-distinct 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d599cdfb56035c6d6cb89ac29440ed549ccbecec
4
- data.tar.gz: 9f78869da19e5367ff6ecdc8281fcc634dd13edf
3
+ metadata.gz: 812453d66d083c75d383cc6df8431158d7e81551
4
+ data.tar.gz: 8f294e26ebe536ef633793cd73aded5e83e618e5
5
5
  SHA512:
6
- metadata.gz: 488c534c6fe14eb5cb63c267b96c4f725a15f5cdc608ca5ea9a7dc05a3f9f3aeb49dd620ed3b6b82e2ee4cc69cdadd4090f76b7115edb6ae47ae4388d3c27f69
7
- data.tar.gz: bed20d3550b27ab998d11a2dad42ad66a855c467c06fdce3a850de537d9f3d9b23fd21aaeeb4d7ef984c295d2534fc2d387711edac4c1e6530b391de82ca5ab8
6
+ metadata.gz: e46ac798104936c4a36badc293f9a93295f74bcbfed9ad71c337b9c5abd5087f7109a3f1ca1e657872eb51dd315d97d1a7a5999d329904df89264a18603734ff
7
+ data.tar.gz: dabc9edcf9e3593f467c0859bee7aa370b155b7492bdb957fefa3611e5b5c9a11de21ef40a363b969a0f072cd65498e0f1433b54f9043adfd63d45c0a132a394
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ 0.0.4 (2017-08-03)
2
+ ==================
3
+
4
+ - Migrate to Embulk v0.8.18
5
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/4
6
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/5
7
+ - Add tests
8
+ - https://github.com/civitaspo/embulk-filter-distinct/pull/8
9
+
1
10
  0.0.3 (2016-01-05)
2
11
  ==================
3
12
 
data/build.gradle CHANGED
@@ -2,6 +2,7 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
5
6
  // For test/coverage
6
7
  id "com.github.kt3k.coveralls" version "2.4.0"
7
8
  id "jacoco"
@@ -15,16 +16,15 @@ configurations {
15
16
  provided
16
17
  }
17
18
 
18
- version = "0.0.3"
19
+ version = "0.0.4"
19
20
  sourceCompatibility = 1.7
20
21
  targetCompatibility = 1.7
21
22
 
22
23
  dependencies {
23
- compile "org.embulk:embulk-core:0.7.+"
24
- provided "org.embulk:embulk-core:0.7.+"
25
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
24
+ compile "org.embulk:embulk-core:0.8.18"
25
+ provided "org.embulk:embulk-core:0.8.18"
26
26
  testCompile "junit:junit:4.+"
27
- testCompile "org.embulk:embulk-core:0.7.+:tests"
27
+ testCompile "org.embulk:embulk-core:0.8.18:tests"
28
28
  }
29
29
 
30
30
  jacocoTestReport {
@@ -41,6 +41,23 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
41
41
  }
42
42
  clean { delete "classpath" }
43
43
 
44
+ checkstyle {
45
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
46
+ toolVersion = '6.14.1'
47
+ }
48
+ checkstyleMain {
49
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
50
+ ignoreFailures = true
51
+ }
52
+ checkstyleTest {
53
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
54
+ ignoreFailures = true
55
+ }
56
+ task checkstyle(type: Checkstyle) {
57
+ classpath = sourceSets.main.output + sourceSets.test.output
58
+ source = sourceSets.main.allJava + sourceSets.test.allJava
59
+ }
60
+
44
61
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
45
62
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
46
63
  script "${project.name}.gemspec"
@@ -76,7 +93,6 @@ Gem::Specification.new do |spec|
76
93
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
77
94
  spec.require_paths = ["lib"]
78
95
 
79
- #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
80
96
  spec.add_development_dependency 'bundler', ['~> 1.0']
81
97
  spec.add_development_dependency 'rake', ['>= 10.0']
82
98
  end
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
Binary file
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -7,9 +7,6 @@ import org.embulk.spi.PageBuilder;
7
7
  import org.embulk.spi.PageReader;
8
8
  import org.slf4j.Logger;
9
9
 
10
- /**
11
- * Created by takahiro.nakayama on 12/6/15.
12
- */
13
10
  class ColumnVisitorImpl
14
11
  implements ColumnVisitor
15
12
  {
@@ -58,6 +55,17 @@ class ColumnVisitorImpl
58
55
  }
59
56
  }
60
57
 
58
+ @Override
59
+ public void jsonColumn(Column outputColumn)
60
+ {
61
+ if (pageReader.isNull(outputColumn)) {
62
+ pageBuilder.setNull(outputColumn);
63
+ }
64
+ else {
65
+ pageBuilder.setJson(outputColumn, pageReader.getJson(outputColumn));
66
+ }
67
+ }
68
+
61
69
  @Override
62
70
  public void timestampColumn(Column outputColumn) {
63
71
  if (pageReader.isNull(outputColumn)) {
@@ -17,9 +17,6 @@ import org.slf4j.Logger;
17
17
  import java.util.List;
18
18
  import java.util.Set;
19
19
 
20
- /**
21
- * Created by takahiro.nakayama on 12/6/15.
22
- */
23
20
  class DistinctFilterPageOutput
24
21
  implements PageOutput
25
22
  {
@@ -90,6 +87,9 @@ class DistinctFilterPageOutput
90
87
  else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
91
88
  builder.add(pageReader.getTimestamp(distinctColumn));
92
89
  }
90
+ else if (Types.JSON.equals(distinctColumn.getType())) {
91
+ builder.add(pageReader.getJson(distinctColumn));
92
+ }
93
93
  else {
94
94
  throw new RuntimeException("unsupported type: " + distinctColumn.getType());
95
95
  }
@@ -1,5 +1,490 @@
1
1
  package org.embulk.filter.distinct;
2
2
 
3
+ import com.google.common.collect.Lists;
4
+ import com.google.common.collect.Sets;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigLoader;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.Page;
11
+ import org.embulk.spi.PageOutput;
12
+ import org.embulk.spi.PageTestUtils;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
15
+ import org.embulk.spi.json.JsonParser;
16
+ import org.embulk.spi.time.Timestamp;
17
+ import org.embulk.spi.type.Type;
18
+ import org.embulk.spi.type.Types;
19
+ import org.embulk.spi.util.Pages;
20
+ import org.junit.Before;
21
+ import org.junit.Rule;
22
+ import org.junit.Test;
23
+ import org.junit.rules.ExpectedException;
24
+
25
+ import java.lang.reflect.Field;
26
+ import java.lang.reflect.Modifier;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.filter.distinct.DistinctFilterPlugin.PluginTask;
30
+ import static org.embulk.spi.FilterPlugin.Control;
31
+ import static org.junit.Assert.assertEquals;
32
+ import static org.junit.Assert.assertNull;
33
+
3
34
  public class TestDistinctFilterPlugin
4
35
  {
36
+ @Rule
37
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
38
+
39
+ @Rule
40
+ public ExpectedException exception = ExpectedException.none();
41
+
42
+ private static JsonParser jsonParser = new JsonParser();
43
+ private Schema schema;
44
+ private DistinctFilterPlugin plugin;
45
+
46
+ // http://stackoverflow.com/questions/3301635/change-private-static-final-field-using-java-reflection
47
+ private static void setFinalStaticVariable(Field field, Object newValue)
48
+ throws IllegalAccessException, NoSuchFieldException
49
+ {
50
+ field.setAccessible(true);
51
+
52
+ Field modifiersField = Field.class.getDeclaredField("modifiers");
53
+ modifiersField.setAccessible(true);
54
+ modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
55
+
56
+ field.set(null, newValue);
57
+ }
58
+
59
+ private Schema schema(Object... nameAndTypes)
60
+ {
61
+ Schema.Builder builder = Schema.builder();
62
+ for (int i = 0; i < nameAndTypes.length; i += 2) {
63
+ builder.add((String) nameAndTypes[i], (Type) nameAndTypes[i + 1]);
64
+ }
65
+ return builder.build();
66
+ }
67
+
68
+ private ConfigSource loadConfigFromYaml(String yaml)
69
+ {
70
+ ConfigLoader loader = new ConfigLoader(runtime.getModelManager());
71
+ return loader.fromYamlString(yaml);
72
+ }
73
+
74
+ @Before
75
+ public void setupDefault()
76
+ {
77
+ schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
78
+ plugin = new DistinctFilterPlugin();
79
+ }
80
+
81
+ @Before
82
+ public void resetStatic()
83
+ throws IllegalAccessException, NoSuchFieldException
84
+ {
85
+ Field set = DistinctFilterPageOutput.class.getDeclaredField("set");
86
+ setFinalStaticVariable(set, Sets.<List<Object>>newConcurrentHashSet());
87
+ }
88
+
89
+ @Test
90
+ public void testConfigure()
91
+ {
92
+ String yaml = "" +
93
+ "type: distinct\n" +
94
+ "columns: [_c0, _c1]\n";
95
+
96
+ ConfigSource config = loadConfigFromYaml(yaml);
97
+ PluginTask task = config.loadConfig(PluginTask.class);
98
+
99
+ assertEquals(Lists.newArrayList("_c0", "_c1"), task.getDistinctColumnNames());
100
+ }
101
+
102
+ @Test
103
+ public void testConfigureInjectedTask()
104
+ {
105
+ String yaml = "" +
106
+ "type: distinct\n" +
107
+ "columns: [_c0, _c1]\n";
108
+
109
+ ConfigSource config = loadConfigFromYaml(yaml);
110
+
111
+ plugin.transaction(config, schema, new Control() {
112
+ @Override
113
+ public void run(TaskSource taskSource, Schema outputSchema)
114
+ {
115
+ PluginTask task = taskSource.loadTask(PluginTask.class);
116
+
117
+ List<Column> columns = Lists.newArrayList();
118
+ columns.add(new Column(0, "_c0", Types.STRING));
119
+ columns.add(new Column(1, "_c1", Types.STRING));
120
+
121
+ assertEquals(columns, task.getDistinctColumns());
122
+ }
123
+ });
124
+ }
125
+
126
+ @Test
127
+ public void testDistinctBySingleColumn()
128
+ {
129
+ String yaml = "" +
130
+ "type: distinct\n" +
131
+ "columns: [_c0]\n";
132
+
133
+ ConfigSource config = loadConfigFromYaml(yaml);
134
+ plugin.transaction(config, schema, new Control() {
135
+ @Override
136
+ public void run(TaskSource taskSource, Schema outputSchema)
137
+ {
138
+ MockPageOutput output = new MockPageOutput();
139
+
140
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
141
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
142
+ "a", "a", // row: 1
143
+ "a", "a", // row: 2
144
+ "a", "b", // row: 3
145
+ "b", "b", // row: 4
146
+ "b", "a", // row: 5
147
+ "b", "b", // row: 6
148
+ null, "a", // row: 7
149
+ null, "b" // row: 8
150
+ )
151
+ ) {
152
+ pageOutput.add(page);
153
+ }
154
+ pageOutput.finish();
155
+ }
156
+
157
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
158
+ assertEquals(3, records.size());
159
+
160
+ Object[] record1 = records.get(0);
161
+ assertEquals("a", record1[0]);
162
+ assertEquals("a", record1[1]);
163
+
164
+ Object[] record2 = records.get(1);
165
+ assertEquals("b", record2[0]);
166
+ assertEquals("b", record2[1]);
167
+
168
+ Object[] record3 = records.get(2);
169
+ assertNull(record3[0]);
170
+ assertEquals("a", record3[1]);
171
+ }
172
+ });
173
+ }
174
+
175
+ @Test
176
+ public void testDistinctByMultipleColumns()
177
+ {
178
+ String yaml = "" +
179
+ "type: distinct\n" +
180
+ "columns: [_c0, _c1]\n";
181
+
182
+ ConfigSource config = loadConfigFromYaml(yaml);
183
+ plugin.transaction(config, schema, new Control() {
184
+ @Override
185
+ public void run(TaskSource taskSource, Schema outputSchema)
186
+ {
187
+ MockPageOutput output = new MockPageOutput();
188
+
189
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
190
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
191
+ "a", "a", // row: 1
192
+ "a", "a", // row: 2
193
+ "a", "b", // row: 3
194
+ "b", "b", // row: 4
195
+ "b", "a", // row: 5
196
+ "b", "b", // row: 6
197
+ null, "a", // row: 7
198
+ null, "b" // row: 8
199
+ )
200
+ ) {
201
+ pageOutput.add(page);
202
+ }
203
+ pageOutput.finish();
204
+ }
205
+
206
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
207
+ assertEquals(6, records.size());
208
+
209
+ Object[] record1 = records.get(0);
210
+ assertEquals("a", record1[0]);
211
+ assertEquals("a", record1[1]);
212
+
213
+ Object[] record2 = records.get(1);
214
+ assertEquals("a", record2[0]);
215
+ assertEquals("b", record2[1]);
216
+
217
+ Object[] record3 = records.get(2);
218
+ assertEquals("b", record3[0]);
219
+ assertEquals("b", record3[1]);
220
+
221
+ Object[] record4 = records.get(3);
222
+ assertEquals("b", record4[0]);
223
+ assertEquals("a", record4[1]);
224
+
225
+ Object[] record5 = records.get(4);
226
+ assertNull(record5[0]);
227
+ assertEquals("a", record5[1]);
228
+
229
+ Object[] record6 = records.get(5);
230
+ assertNull(record6[0]);
231
+ assertEquals("b", record6[1]);
232
+ }
233
+ });
234
+ }
235
+
236
+ @Test
237
+ public void testDistinctByLongColumn()
238
+ {
239
+ schema = schema("_c0", Types.LONG, "_c1", Types.STRING);
240
+ String yaml = "" +
241
+ "type: distinct\n" +
242
+ "columns: [_c0]\n";
243
+
244
+ ConfigSource config = loadConfigFromYaml(yaml);
245
+ plugin.transaction(config, schema, new Control() {
246
+ @Override
247
+ public void run(TaskSource taskSource, Schema outputSchema)
248
+ {
249
+ MockPageOutput output = new MockPageOutput();
250
+
251
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
252
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
253
+ 1L, "a", // row: 1
254
+ 1L, "a", // row: 2
255
+ 1L, "b", // row: 3
256
+ 2L, "b", // row: 4
257
+ 2L, "a", // row: 5
258
+ 2L, "b", // row: 6
259
+ null, "a", // row: 7
260
+ null, "b" // row: 8
261
+ )
262
+ ) {
263
+ pageOutput.add(page);
264
+ }
265
+ pageOutput.finish();
266
+ }
267
+
268
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
269
+ assertEquals(3, records.size());
270
+
271
+ Object[] record1 = records.get(0);
272
+ assertEquals(1L, record1[0]);
273
+ assertEquals("a", record1[1]);
274
+
275
+ Object[] record2 = records.get(1);
276
+ assertEquals(2L, record2[0]);
277
+ assertEquals("b", record2[1]);
278
+
279
+ Object[] record3 = records.get(2);
280
+ assertNull(record3[0]);
281
+ assertEquals("a", record3[1]);
282
+ }
283
+ });
284
+ }
285
+
286
+ @Test
287
+ public void testDistinctByDoubleColumn()
288
+ {
289
+ schema = schema("_c0", Types.DOUBLE, "_c1", Types.STRING);
290
+ String yaml = "" +
291
+ "type: distinct\n" +
292
+ "columns: [_c0]\n";
293
+
294
+ ConfigSource config = loadConfigFromYaml(yaml);
295
+ plugin.transaction(config, schema, new Control() {
296
+ @Override
297
+ public void run(TaskSource taskSource, Schema outputSchema)
298
+ {
299
+ MockPageOutput output = new MockPageOutput();
300
+
301
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
302
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
303
+ 1.1, "a", // row: 1
304
+ 1.1, "a", // row: 2
305
+ 1.1, "b", // row: 3
306
+ 2.2, "b", // row: 4
307
+ 2.2, "a", // row: 5
308
+ 2.2, "b", // row: 6
309
+ null, "a", // row: 7
310
+ null, "b" // row: 8
311
+ )
312
+ ) {
313
+ pageOutput.add(page);
314
+ }
315
+ pageOutput.finish();
316
+ }
317
+
318
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
319
+ assertEquals(3, records.size());
320
+
321
+ Object[] record1 = records.get(0);
322
+ assertEquals(1.1, record1[0]);
323
+ assertEquals("a", record1[1]);
324
+
325
+ Object[] record2 = records.get(1);
326
+ assertEquals(2.2, record2[0]);
327
+ assertEquals("b", record2[1]);
328
+
329
+ Object[] record3 = records.get(2);
330
+ assertNull(record3[0]);
331
+ assertEquals("a", record3[1]);
332
+ }
333
+ });
334
+ }
335
+
336
+ @Test
337
+ public void testDistinctByBooleanColumn()
338
+ {
339
+ schema = schema("_c0", Types.BOOLEAN, "_c1", Types.STRING);
340
+ String yaml = "" +
341
+ "type: distinct\n" +
342
+ "columns: [_c0]\n";
343
+
344
+ ConfigSource config = loadConfigFromYaml(yaml);
345
+ plugin.transaction(config, schema, new Control() {
346
+ @Override
347
+ public void run(TaskSource taskSource, Schema outputSchema)
348
+ {
349
+ MockPageOutput output = new MockPageOutput();
350
+
351
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
352
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
353
+ true, "a", // row: 1
354
+ true, "a", // row: 2
355
+ true, "b", // row: 3
356
+ false, "b", // row: 4
357
+ false, "a", // row: 5
358
+ false, "b", // row: 6
359
+ null, "a", // row: 7
360
+ null, "b" // row: 8
361
+ )
362
+ ) {
363
+ pageOutput.add(page);
364
+ }
365
+ pageOutput.finish();
366
+ }
367
+
368
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
369
+ assertEquals(3, records.size());
370
+
371
+ Object[] record1 = records.get(0);
372
+ assertEquals(true, record1[0]);
373
+ assertEquals("a", record1[1]);
374
+
375
+ Object[] record2 = records.get(1);
376
+ assertEquals(false, record2[0]);
377
+ assertEquals("b", record2[1]);
378
+
379
+ Object[] record3 = records.get(2);
380
+ assertNull(record3[0]);
381
+ assertEquals("a", record3[1]);
382
+ }
383
+ });
384
+ }
385
+
386
+
387
+ @Test
388
+ public void testDistinctByTimestampColumn()
389
+ {
390
+ schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.STRING);
391
+ String yaml = "" +
392
+ "type: distinct\n" +
393
+ "columns: [_c0]\n";
394
+
395
+ ConfigSource config = loadConfigFromYaml(yaml);
396
+ plugin.transaction(config, schema, new Control() {
397
+ @Override
398
+ public void run(TaskSource taskSource, Schema outputSchema)
399
+ {
400
+ MockPageOutput output = new MockPageOutput();
401
+
402
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
403
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
404
+ Timestamp.ofEpochSecond(1), "a", // row: 1
405
+ Timestamp.ofEpochSecond(1), "a", // row: 2
406
+ Timestamp.ofEpochSecond(1), "b", // row: 3
407
+ Timestamp.ofEpochSecond(2), "b", // row: 4
408
+ Timestamp.ofEpochSecond(2), "a", // row: 5
409
+ Timestamp.ofEpochSecond(2), "b", // row: 6
410
+ null, "a", // row: 7
411
+ null, "b" // row: 8
412
+ )
413
+ ) {
414
+ pageOutput.add(page);
415
+ }
416
+ pageOutput.finish();
417
+ }
418
+
419
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
420
+ assertEquals(3, records.size());
421
+
422
+ Object[] record1 = records.get(0);
423
+ assertEquals(Timestamp.ofEpochSecond(1), record1[0]);
424
+ assertEquals("a", record1[1]);
425
+
426
+ Object[] record2 = records.get(1);
427
+ assertEquals(Timestamp.ofEpochSecond(2), record2[0]);
428
+ assertEquals("b", record2[1]);
429
+
430
+ Object[] record3 = records.get(2);
431
+ assertNull(record3[0]);
432
+ assertEquals("a", record3[1]);
433
+ }
434
+ });
435
+ }
436
+
437
+
438
+ @Test
439
+ public void testDistinctByJsonColumn()
440
+ {
441
+ schema = schema("_c0", Types.JSON, "_c1", Types.STRING);
442
+ String yaml = "" +
443
+ "type: distinct\n" +
444
+ "columns: [_c0]\n";
445
+
446
+ final String json1 = "{\"a\":1,\"b\":\"b\"}";
447
+ final String json2 = "{\"a\":2,\"b\":\"b\"}";
448
+
449
+ ConfigSource config = loadConfigFromYaml(yaml);
450
+ plugin.transaction(config, schema, new Control() {
451
+ @Override
452
+ public void run(TaskSource taskSource, Schema outputSchema)
453
+ {
454
+ MockPageOutput output = new MockPageOutput();
455
+
456
+ try (PageOutput pageOutput = plugin.open(taskSource, schema, outputSchema, output)) {
457
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
458
+ jsonParser.parse(json1), "a", // row: 1
459
+ jsonParser.parse(json1), "a", // row: 2
460
+ jsonParser.parse(json1), "b", // row: 3
461
+ jsonParser.parse(json2), "b", // row: 4
462
+ jsonParser.parse(json2), "a", // row: 5
463
+ jsonParser.parse(json2), "b", // row: 6
464
+ null, "a", // row: 7
465
+ null, "b" // row: 8
466
+ )
467
+ ) {
468
+ pageOutput.add(page);
469
+ }
470
+ pageOutput.finish();
471
+ }
472
+
473
+ List<Object[]> records = Pages.toObjects(outputSchema, output.pages);
474
+ assertEquals(3, records.size());
475
+
476
+ Object[] record1 = records.get(0);
477
+ assertEquals(jsonParser.parse(json1), record1[0]);
478
+ assertEquals("a", record1[1]);
479
+
480
+ Object[] record2 = records.get(1);
481
+ assertEquals(jsonParser.parse(json2), record2[0]);
482
+ assertEquals("b", record2[1]);
483
+
484
+ Object[] record3 = records.get(2);
485
+ assertNull(record3[0]);
486
+ assertEquals("a", record3[1]);
487
+ }
488
+ });
489
+ }
5
490
  }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-distinct
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-05 00:00:00.000000000 Z
11
+ date: 2017-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Distinct
42
42
  email:
43
43
  - civitaspo@gmail.com
@@ -50,6 +50,8 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
53
55
  - example/config.yml
54
56
  - example/data.csv
55
57
  - gradle/wrapper/gradle-wrapper.jar
@@ -61,7 +63,7 @@ files:
61
63
  - src/main/java/org/embulk/filter/distinct/DistinctFilterPageOutput.java
62
64
  - src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
63
65
  - src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
64
- - classpath/embulk-filter-distinct-0.0.3.jar
66
+ - classpath/embulk-filter-distinct-0.0.4.jar
65
67
  homepage: https://github.com/civitaspo/embulk-filter-distinct
66
68
  licenses:
67
69
  - MIT