embulk-filter-distinct 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d599cdfb56035c6d6cb89ac29440ed549ccbecec
|
4
|
+
data.tar.gz: 9f78869da19e5367ff6ecdc8281fcc634dd13edf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 488c534c6fe14eb5cb63c267b96c4f725a15f5cdc608ca5ea9a7dc05a3f9f3aeb49dd620ed3b6b82e2ee4cc69cdadd4090f76b7115edb6ae47ae4388d3c27f69
|
7
|
+
data.tar.gz: bed20d3550b27ab998d11a2dad42ad66a855c467c06fdce3a850de537d9f3d9b23fd21aaeeb4d7ef984c295d2534fc2d387711edac4c1e6530b391de82ca5ab8
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -1,9 +1,7 @@
|
|
1
1
|
package org.embulk.filter.distinct;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Strings;
|
5
4
|
import com.google.common.collect.ImmutableList;
|
6
|
-
import com.google.common.collect.ObjectArrays;
|
7
5
|
import com.google.common.collect.Sets;
|
8
6
|
import org.embulk.filter.distinct.DistinctFilterPlugin.PluginTask;
|
9
7
|
import org.embulk.spi.Column;
|
@@ -22,20 +20,20 @@ import java.util.Set;
|
|
22
20
|
/**
|
23
21
|
* Created by takahiro.nakayama on 12/6/15.
|
24
22
|
*/
|
25
|
-
class
|
23
|
+
class DistinctFilterPageOutput
|
26
24
|
implements PageOutput
|
27
25
|
{
|
28
|
-
private final static Logger logger = Exec.getLogger(
|
26
|
+
private final static Logger logger = Exec.getLogger(DistinctFilterPageOutput.class);
|
29
27
|
private final PageReader pageReader;
|
30
28
|
private final PageBuilder pageBuilder;
|
31
29
|
private final ColumnVisitorImpl visitor;
|
32
30
|
private final Schema outputSchema;
|
33
31
|
private final List<Column> distinctColumns;
|
34
32
|
|
35
|
-
private final static Set<List<Object>>
|
33
|
+
private final static Set<List<Object>> set = Sets.newConcurrentHashSet();
|
36
34
|
|
37
|
-
|
38
|
-
|
35
|
+
DistinctFilterPageOutput(PluginTask task, Schema inputSchema,
|
36
|
+
Schema outputSchema, PageOutput pageOutput)
|
39
37
|
{
|
40
38
|
this.pageReader = new PageReader(inputSchema);
|
41
39
|
this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
|
@@ -100,12 +98,12 @@ class FilteredPageOutput
|
|
100
98
|
return builder.build();
|
101
99
|
}
|
102
100
|
|
103
|
-
private boolean isDistinct(List<Object>
|
104
|
-
if (
|
101
|
+
private boolean isDistinct(List<Object> values) {
|
102
|
+
if (set.add(values)) {
|
105
103
|
return true;
|
106
104
|
}
|
107
105
|
else {
|
108
|
-
logger.debug("Duplicated
|
106
|
+
logger.debug("Duplicated values: {}", values);
|
109
107
|
return false;
|
110
108
|
}
|
111
109
|
}
|
@@ -2,7 +2,6 @@ package org.embulk.filter.distinct;
|
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import org.embulk.config.Config;
|
5
|
-
import org.embulk.config.ConfigDefault;
|
6
5
|
import org.embulk.config.ConfigException;
|
7
6
|
import org.embulk.config.ConfigInject;
|
8
7
|
import org.embulk.config.ConfigSource;
|
@@ -35,7 +34,7 @@ public class DistinctFilterPlugin
|
|
35
34
|
|
36
35
|
@Override
|
37
36
|
public void transaction(ConfigSource config, Schema inputSchema,
|
38
|
-
|
37
|
+
FilterPlugin.Control control)
|
39
38
|
{
|
40
39
|
PluginTask task = config.loadConfig(PluginTask.class);
|
41
40
|
|
@@ -59,8 +58,8 @@ public class DistinctFilterPlugin
|
|
59
58
|
final Schema outputSchema, final PageOutput output)
|
60
59
|
{
|
61
60
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
62
|
-
return new
|
63
|
-
|
61
|
+
return new DistinctFilterPageOutput(task, inputSchema,
|
62
|
+
outputSchema, output);
|
64
63
|
}
|
65
64
|
|
66
65
|
private List<Column> convertNameToColumn(Schema inputSchema, List<String> columnNames)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-distinct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,10 +58,10 @@ files:
|
|
58
58
|
- gradlew.bat
|
59
59
|
- lib/embulk/filter/distinct.rb
|
60
60
|
- src/main/java/org/embulk/filter/distinct/ColumnVisitorImpl.java
|
61
|
+
- src/main/java/org/embulk/filter/distinct/DistinctFilterPageOutput.java
|
61
62
|
- src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
|
62
|
-
- src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java
|
63
63
|
- src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-distinct-0.0.
|
64
|
+
- classpath/embulk-filter-distinct-0.0.3.jar
|
65
65
|
homepage: https://github.com/civitaspo/embulk-filter-distinct
|
66
66
|
licenses:
|
67
67
|
- MIT
|