embulk-filter-distinct 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java +33 -21
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a22e5f7f1bec26311291809343774c9d5563abb
|
4
|
+
data.tar.gz: a299a75dd6efa46f3051b6f4c6b8a0ce5dc661a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b33112403b386df0fffc317b8690bac18cbb96bd6c8b9a72903c113dd87a7ba7b5b3b466342e355834a69bde0c8a9198c61d5ce1393279b06c648e2b07baf215
|
7
|
+
data.tar.gz: 440db0f0e64bc4b9216317cad7f1a5f87af9fefb0d86ef206d9cd6c1a3e4a8d893c2d071b82c96c385f8edb4b482cf4cba062d3ca9928a5b634a7048ee6ab1d1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.0.2 (2015-12-09)
|
2
|
+
==================
|
3
|
+
|
4
|
+
- Fix a bug: when the distinct key includes null, this plugin did not guarantee the distinctness.
|
5
|
+
- Add debug log: the filtered key like `Duplicated key: [value1, value2, value3]`
|
6
|
+
|
1
7
|
0.0.1 (2015-12-08)
|
2
8
|
==================
|
3
9
|
|
data/build.gradle
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.distinct;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
3
4
|
import com.google.common.base.Strings;
|
4
5
|
import com.google.common.collect.ImmutableList;
|
5
6
|
import com.google.common.collect.ObjectArrays;
|
@@ -49,7 +50,7 @@ class FilteredPageOutput
|
|
49
50
|
pageReader.setPage(page);
|
50
51
|
|
51
52
|
while (pageReader.nextRecord()) {
|
52
|
-
if (
|
53
|
+
if (isDistinct(getCurrentValues())) {
|
53
54
|
outputSchema.visitColumns(visitor);
|
54
55
|
pageBuilder.addRecord();
|
55
56
|
}
|
@@ -69,32 +70,43 @@ class FilteredPageOutput
|
|
69
70
|
pageBuilder.close();
|
70
71
|
}
|
71
72
|
|
72
|
-
private List<Object>
|
73
|
+
private List<Object> getCurrentValues()
|
73
74
|
{
|
74
75
|
ImmutableList.Builder<Object> builder = ImmutableList.builder();
|
75
76
|
for (Column distinctColumn : distinctColumns) {
|
76
|
-
if (
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
77
|
+
if (pageReader.isNull(distinctColumn)) {
|
78
|
+
builder.add(Optional.absent());
|
79
|
+
}
|
80
|
+
else if (Types.BOOLEAN.equals(distinctColumn.getType())) {
|
81
|
+
builder.add(pageReader.getBoolean(distinctColumn));
|
82
|
+
}
|
83
|
+
else if (Types.DOUBLE.equals(distinctColumn.getType())) {
|
84
|
+
builder.add(pageReader.getDouble(distinctColumn));
|
85
|
+
}
|
86
|
+
else if (Types.LONG.equals(distinctColumn.getType())) {
|
87
|
+
builder.add(pageReader.getLong(distinctColumn));
|
88
|
+
}
|
89
|
+
else if (Types.STRING.equals(distinctColumn.getType())) {
|
90
|
+
builder.add(pageReader.getString(distinctColumn));
|
91
|
+
}
|
92
|
+
else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
|
93
|
+
builder.add(pageReader.getTimestamp(distinctColumn));
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
throw new RuntimeException("unsupported type: " + distinctColumn.getType());
|
95
97
|
}
|
96
98
|
}
|
97
99
|
|
98
100
|
return builder.build();
|
99
101
|
}
|
102
|
+
|
103
|
+
private boolean isDistinct(List<Object> key) {
|
104
|
+
if (filter.add(key)) {
|
105
|
+
return true;
|
106
|
+
}
|
107
|
+
else {
|
108
|
+
logger.debug("Duplicated key: {}", key);
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
}
|
100
112
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-distinct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
|
62
62
|
- src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java
|
63
63
|
- src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-distinct-0.0.
|
64
|
+
- classpath/embulk-filter-distinct-0.0.2.jar
|
65
65
|
homepage: https://github.com/civitaspo/embulk-filter-distinct
|
66
66
|
licenses:
|
67
67
|
- MIT
|