embulk-filter-distinct 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d599cdfb56035c6d6cb89ac29440ed549ccbecec
|
4
|
+
data.tar.gz: 9f78869da19e5367ff6ecdc8281fcc634dd13edf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 488c534c6fe14eb5cb63c267b96c4f725a15f5cdc608ca5ea9a7dc05a3f9f3aeb49dd620ed3b6b82e2ee4cc69cdadd4090f76b7115edb6ae47ae4388d3c27f69
|
7
|
+
data.tar.gz: bed20d3550b27ab998d11a2dad42ad66a855c467c06fdce3a850de537d9f3d9b23fd21aaeeb4d7ef984c295d2534fc2d387711edac4c1e6530b391de82ca5ab8
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -1,9 +1,7 @@
|
|
1
1
|
package org.embulk.filter.distinct;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Strings;
|
5
4
|
import com.google.common.collect.ImmutableList;
|
6
|
-
import com.google.common.collect.ObjectArrays;
|
7
5
|
import com.google.common.collect.Sets;
|
8
6
|
import org.embulk.filter.distinct.DistinctFilterPlugin.PluginTask;
|
9
7
|
import org.embulk.spi.Column;
|
@@ -22,20 +20,20 @@ import java.util.Set;
|
|
22
20
|
/**
|
23
21
|
* Created by takahiro.nakayama on 12/6/15.
|
24
22
|
*/
|
25
|
-
class
|
23
|
+
class DistinctFilterPageOutput
|
26
24
|
implements PageOutput
|
27
25
|
{
|
28
|
-
private final static Logger logger = Exec.getLogger(
|
26
|
+
private final static Logger logger = Exec.getLogger(DistinctFilterPageOutput.class);
|
29
27
|
private final PageReader pageReader;
|
30
28
|
private final PageBuilder pageBuilder;
|
31
29
|
private final ColumnVisitorImpl visitor;
|
32
30
|
private final Schema outputSchema;
|
33
31
|
private final List<Column> distinctColumns;
|
34
32
|
|
35
|
-
private final static Set<List<Object>>
|
33
|
+
private final static Set<List<Object>> set = Sets.newConcurrentHashSet();
|
36
34
|
|
37
|
-
|
38
|
-
|
35
|
+
DistinctFilterPageOutput(PluginTask task, Schema inputSchema,
|
36
|
+
Schema outputSchema, PageOutput pageOutput)
|
39
37
|
{
|
40
38
|
this.pageReader = new PageReader(inputSchema);
|
41
39
|
this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
|
@@ -100,12 +98,12 @@ class FilteredPageOutput
|
|
100
98
|
return builder.build();
|
101
99
|
}
|
102
100
|
|
103
|
-
private boolean isDistinct(List<Object>
|
104
|
-
if (
|
101
|
+
private boolean isDistinct(List<Object> values) {
|
102
|
+
if (set.add(values)) {
|
105
103
|
return true;
|
106
104
|
}
|
107
105
|
else {
|
108
|
-
logger.debug("Duplicated
|
106
|
+
logger.debug("Duplicated values: {}", values);
|
109
107
|
return false;
|
110
108
|
}
|
111
109
|
}
|
@@ -2,7 +2,6 @@ package org.embulk.filter.distinct;
|
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import org.embulk.config.Config;
|
5
|
-
import org.embulk.config.ConfigDefault;
|
6
5
|
import org.embulk.config.ConfigException;
|
7
6
|
import org.embulk.config.ConfigInject;
|
8
7
|
import org.embulk.config.ConfigSource;
|
@@ -35,7 +34,7 @@ public class DistinctFilterPlugin
|
|
35
34
|
|
36
35
|
@Override
|
37
36
|
public void transaction(ConfigSource config, Schema inputSchema,
|
38
|
-
|
37
|
+
FilterPlugin.Control control)
|
39
38
|
{
|
40
39
|
PluginTask task = config.loadConfig(PluginTask.class);
|
41
40
|
|
@@ -59,8 +58,8 @@ public class DistinctFilterPlugin
|
|
59
58
|
final Schema outputSchema, final PageOutput output)
|
60
59
|
{
|
61
60
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
62
|
-
return new
|
63
|
-
|
61
|
+
return new DistinctFilterPageOutput(task, inputSchema,
|
62
|
+
outputSchema, output);
|
64
63
|
}
|
65
64
|
|
66
65
|
private List<Column> convertNameToColumn(Schema inputSchema, List<String> columnNames)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-distinct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,10 +58,10 @@ files:
|
|
58
58
|
- gradlew.bat
|
59
59
|
- lib/embulk/filter/distinct.rb
|
60
60
|
- src/main/java/org/embulk/filter/distinct/ColumnVisitorImpl.java
|
61
|
+
- src/main/java/org/embulk/filter/distinct/DistinctFilterPageOutput.java
|
61
62
|
- src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
|
62
|
-
- src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java
|
63
63
|
- src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-distinct-0.0.
|
64
|
+
- classpath/embulk-filter-distinct-0.0.3.jar
|
65
65
|
homepage: https://github.com/civitaspo/embulk-filter-distinct
|
66
66
|
licenses:
|
67
67
|
- MIT
|