embulk-filter-distinct 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java +33 -21
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a22e5f7f1bec26311291809343774c9d5563abb
|
4
|
+
data.tar.gz: a299a75dd6efa46f3051b6f4c6b8a0ce5dc661a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b33112403b386df0fffc317b8690bac18cbb96bd6c8b9a72903c113dd87a7ba7b5b3b466342e355834a69bde0c8a9198c61d5ce1393279b06c648e2b07baf215
|
7
|
+
data.tar.gz: 440db0f0e64bc4b9216317cad7f1a5f87af9fefb0d86ef206d9cd6c1a3e4a8d893c2d071b82c96c385f8edb4b482cf4cba062d3ca9928a5b634a7048ee6ab1d1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.0.2 (2015-12-09)
|
2
|
+
==================
|
3
|
+
|
4
|
+
- Fix a bug: when the distinct key includes null, this plugin did not guarantee the distinctness.
|
5
|
+
- Add debug log: the filtered key like `Duplicated key: [value1, value2, value3]`
|
6
|
+
|
1
7
|
0.0.1 (2015-12-08)
|
2
8
|
==================
|
3
9
|
|
data/build.gradle
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.distinct;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
3
4
|
import com.google.common.base.Strings;
|
4
5
|
import com.google.common.collect.ImmutableList;
|
5
6
|
import com.google.common.collect.ObjectArrays;
|
@@ -49,7 +50,7 @@ class FilteredPageOutput
|
|
49
50
|
pageReader.setPage(page);
|
50
51
|
|
51
52
|
while (pageReader.nextRecord()) {
|
52
|
-
if (
|
53
|
+
if (isDistinct(getCurrentValues())) {
|
53
54
|
outputSchema.visitColumns(visitor);
|
54
55
|
pageBuilder.addRecord();
|
55
56
|
}
|
@@ -69,32 +70,43 @@ class FilteredPageOutput
|
|
69
70
|
pageBuilder.close();
|
70
71
|
}
|
71
72
|
|
72
|
-
private List<Object>
|
73
|
+
private List<Object> getCurrentValues()
|
73
74
|
{
|
74
75
|
ImmutableList.Builder<Object> builder = ImmutableList.builder();
|
75
76
|
for (Column distinctColumn : distinctColumns) {
|
76
|
-
if (
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
77
|
+
if (pageReader.isNull(distinctColumn)) {
|
78
|
+
builder.add(Optional.absent());
|
79
|
+
}
|
80
|
+
else if (Types.BOOLEAN.equals(distinctColumn.getType())) {
|
81
|
+
builder.add(pageReader.getBoolean(distinctColumn));
|
82
|
+
}
|
83
|
+
else if (Types.DOUBLE.equals(distinctColumn.getType())) {
|
84
|
+
builder.add(pageReader.getDouble(distinctColumn));
|
85
|
+
}
|
86
|
+
else if (Types.LONG.equals(distinctColumn.getType())) {
|
87
|
+
builder.add(pageReader.getLong(distinctColumn));
|
88
|
+
}
|
89
|
+
else if (Types.STRING.equals(distinctColumn.getType())) {
|
90
|
+
builder.add(pageReader.getString(distinctColumn));
|
91
|
+
}
|
92
|
+
else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
|
93
|
+
builder.add(pageReader.getTimestamp(distinctColumn));
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
throw new RuntimeException("unsupported type: " + distinctColumn.getType());
|
95
97
|
}
|
96
98
|
}
|
97
99
|
|
98
100
|
return builder.build();
|
99
101
|
}
|
102
|
+
|
103
|
+
private boolean isDistinct(List<Object> key) {
|
104
|
+
if (filter.add(key)) {
|
105
|
+
return true;
|
106
|
+
}
|
107
|
+
else {
|
108
|
+
logger.debug("Duplicated key: {}", key);
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
}
|
100
112
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-distinct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
|
62
62
|
- src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java
|
63
63
|
- src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-distinct-0.0.
|
64
|
+
- classpath/embulk-filter-distinct-0.0.2.jar
|
65
65
|
homepage: https://github.com/civitaspo/embulk-filter-distinct
|
66
66
|
licenses:
|
67
67
|
- MIT
|