embulk-filter-distinct 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c300a014b0ed10c8ef0c71f17d1f7acde8a0f6e
4
- data.tar.gz: 25680ed19adfddd53bb7cadaf4d538c9838ea53e
3
+ metadata.gz: 2a22e5f7f1bec26311291809343774c9d5563abb
4
+ data.tar.gz: a299a75dd6efa46f3051b6f4c6b8a0ce5dc661a5
5
5
  SHA512:
6
- metadata.gz: 5f2ea1a43abe07442ec07c415536da6aa0202c89f73d5d28cfb53f44daf2eb12dc36e1d66376078a0f19adc13bd15f52cf9487db34d4500a27dd28730cd9036f
7
- data.tar.gz: 061ab68e1a192c0e8df94625061b0a0f5be974fc7fc555dafabd58795496c3faf03f6a0c4890346f3c9b9c3025a33fef0c9ab232568386745e5616a968672f7c
6
+ metadata.gz: b33112403b386df0fffc317b8690bac18cbb96bd6c8b9a72903c113dd87a7ba7b5b3b466342e355834a69bde0c8a9198c61d5ce1393279b06c648e2b07baf215
7
+ data.tar.gz: 440db0f0e64bc4b9216317cad7f1a5f87af9fefb0d86ef206d9cd6c1a3e4a8d893c2d071b82c96c385f8edb4b482cf4cba062d3ca9928a5b634a7048ee6ab1d1
@@ -1,3 +1,9 @@
1
+ 0.0.2 (2015-12-09)
2
+ ==================
3
+
4
+ - Fix a bug: when the distinct key includes null, this plugin did not guarantee the distinctness.
5
+ - Add debug log: the filtered key like `Duplicated key: [value1, value2, value3]`
6
+
1
7
  0.0.1 (2015-12-08)
2
8
  ==================
3
9
 
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.0.1"
18
+ version = "0.0.2"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.distinct;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import com.google.common.base.Strings;
4
5
  import com.google.common.collect.ImmutableList;
5
6
  import com.google.common.collect.ObjectArrays;
@@ -49,7 +50,7 @@ class FilteredPageOutput
49
50
  pageReader.setPage(page);
50
51
 
51
52
  while (pageReader.nextRecord()) {
52
- if (filter.add(getCurrentDistinctKey())) {
53
+ if (isDistinct(getCurrentValues())) {
53
54
  outputSchema.visitColumns(visitor);
54
55
  pageBuilder.addRecord();
55
56
  }
@@ -69,32 +70,43 @@ class FilteredPageOutput
69
70
  pageBuilder.close();
70
71
  }
71
72
 
72
- private List<Object> getCurrentDistinctKey()
73
+ private List<Object> getCurrentValues()
73
74
  {
74
75
  ImmutableList.Builder<Object> builder = ImmutableList.builder();
75
76
  for (Column distinctColumn : distinctColumns) {
76
- if (!pageReader.isNull(distinctColumn)) {
77
- if (Types.BOOLEAN.equals(distinctColumn.getType())) {
78
- builder.add(pageReader.getBoolean(distinctColumn));
79
- }
80
- else if (Types.DOUBLE.equals(distinctColumn.getType())) {
81
- builder.add(pageReader.getDouble(distinctColumn));
82
- }
83
- else if (Types.LONG.equals(distinctColumn.getType())) {
84
- builder.add(pageReader.getLong(distinctColumn));
85
- }
86
- else if (Types.STRING.equals(distinctColumn.getType())) {
87
- builder.add(pageReader.getString(distinctColumn));
88
- }
89
- else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
90
- builder.add(pageReader.getTimestamp(distinctColumn));
91
- }
92
- else {
93
- throw new RuntimeException("unsupported type: " + distinctColumn.getType());
94
- }
77
+ if (pageReader.isNull(distinctColumn)) {
78
+ builder.add(Optional.absent());
79
+ }
80
+ else if (Types.BOOLEAN.equals(distinctColumn.getType())) {
81
+ builder.add(pageReader.getBoolean(distinctColumn));
82
+ }
83
+ else if (Types.DOUBLE.equals(distinctColumn.getType())) {
84
+ builder.add(pageReader.getDouble(distinctColumn));
85
+ }
86
+ else if (Types.LONG.equals(distinctColumn.getType())) {
87
+ builder.add(pageReader.getLong(distinctColumn));
88
+ }
89
+ else if (Types.STRING.equals(distinctColumn.getType())) {
90
+ builder.add(pageReader.getString(distinctColumn));
91
+ }
92
+ else if (Types.TIMESTAMP.equals(distinctColumn.getType())) {
93
+ builder.add(pageReader.getTimestamp(distinctColumn));
94
+ }
95
+ else {
96
+ throw new RuntimeException("unsupported type: " + distinctColumn.getType());
95
97
  }
96
98
  }
97
99
 
98
100
  return builder.build();
99
101
  }
102
+
103
+ private boolean isDistinct(List<Object> key) {
104
+ if (filter.add(key)) {
105
+ return true;
106
+ }
107
+ else {
108
+ logger.debug("Duplicated key: {}", key);
109
+ return false;
110
+ }
111
+ }
100
112
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-distinct
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-08 00:00:00.000000000 Z
11
+ date: 2015-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -61,7 +61,7 @@ files:
61
61
  - src/main/java/org/embulk/filter/distinct/DistinctFilterPlugin.java
62
62
  - src/main/java/org/embulk/filter/distinct/FilteredPageOutput.java
63
63
  - src/test/java/org/embulk/filter/distinct/TestDistinctFilterPlugin.java
64
- - classpath/embulk-filter-distinct-0.0.1.jar
64
+ - classpath/embulk-filter-distinct-0.0.2.jar
65
65
  homepage: https://github.com/civitaspo/embulk-filter-distinct
66
66
  licenses:
67
67
  - MIT