embulk-filter-column 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +13 -2
- data/build.gradle +3 -3
- data/example.yml +2 -0
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +85 -26
- data/src/main/java/org/embulk/filter/column/ColumnConfig.java +8 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4b22fc2b07962eb1295a87ad76ecf7d373e8244
|
4
|
+
data.tar.gz: 4c81bf7c05f93f544c883adf165751108a62c7bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc307a1b813dfd435d8d94c60f44886410546dd1065170218cc8c331930b940d578ed99a92f38f705566b73dc2c08878456762a3924cb8b5adf39e8144bdbd83
|
7
|
+
data.tar.gz: a304d053cc7cf08b562afc84dc663909114b6c73da262fb4be2b82b9c58f17975014ad0d59e3a4d383f30e6f636fae4873f96a704f65dbf962f684eb84eebb74
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,10 @@ A filter plugin for Embulk to filter out columns
|
|
7
7
|
- **columns**: columns (array of hash, required)
|
8
8
|
- **name**: name of column
|
9
9
|
- **default**: default value used if input is null
|
10
|
+
- **format**: special option for timestamp column, specify the format of timestamp default (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
11
|
+
- **timezone**: special option for timestamp column, specify the timezone of timestamp default (string, default is `UTC`)
|
12
|
+
|
13
|
+
NOTE: column type is automatically retrieved from input data (inputSchema)
|
10
14
|
|
11
15
|
## Example
|
12
16
|
|
@@ -14,11 +18,12 @@ A filter plugin for Embulk to filter out columns
|
|
14
18
|
filters:
|
15
19
|
- type: column
|
16
20
|
columns:
|
21
|
+
- {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
17
22
|
- {name: id}
|
18
|
-
- {name: name, default:
|
23
|
+
- {name: name, default: "foo"}
|
19
24
|
```
|
20
25
|
|
21
|
-
reduces columns to only `id
|
26
|
+
reduces columns to only `time`, `id`, and `name` columns.
|
22
27
|
|
23
28
|
## Development
|
24
29
|
|
@@ -29,6 +34,12 @@ $ ./gradlew classpath
|
|
29
34
|
$ embulk run -I lib example.yml
|
30
35
|
```
|
31
36
|
|
37
|
+
Run test:
|
38
|
+
|
39
|
+
```
|
40
|
+
$ ./gradew test
|
41
|
+
```
|
42
|
+
|
32
43
|
Release gem:
|
33
44
|
|
34
45
|
```
|
data/build.gradle
CHANGED
@@ -12,11 +12,11 @@ configurations {
|
|
12
12
|
provided
|
13
13
|
}
|
14
14
|
|
15
|
-
version = "0.1.
|
15
|
+
version = "0.1.5"
|
16
16
|
|
17
17
|
dependencies {
|
18
|
-
compile "org.embulk:embulk-core:0.6.
|
19
|
-
provided "org.embulk:embulk-core:0.6.
|
18
|
+
compile "org.embulk:embulk-core:0.6.16"
|
19
|
+
provided "org.embulk:embulk-core:0.6.16"
|
20
20
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
21
21
|
testCompile "junit:junit:4.+"
|
22
22
|
}
|
data/example.yml
CHANGED
@@ -16,12 +16,14 @@ in:
|
|
16
16
|
skip_header_lines: 1
|
17
17
|
comment_line_marker: '#'
|
18
18
|
columns:
|
19
|
+
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
19
20
|
- {name: id, type: long}
|
20
21
|
- {name: name, type: string}
|
21
22
|
- {name: score, type: double}
|
22
23
|
filters:
|
23
24
|
- type: column
|
24
25
|
columns:
|
26
|
+
- {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
25
27
|
- {name: name, default: "foo"}
|
26
28
|
- {name: foo, default: 1}
|
27
29
|
- {name: id}
|
@@ -1,5 +1,10 @@
|
|
1
1
|
package org.embulk.filter;
|
2
2
|
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.HashMap;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import org.slf4j.Logger;
|
7
|
+
|
3
8
|
import org.embulk.config.Config;
|
4
9
|
import org.embulk.config.ConfigDefault;
|
5
10
|
import org.embulk.config.ConfigDiff;
|
@@ -7,11 +12,12 @@ import org.embulk.config.ConfigSource;
|
|
7
12
|
import org.embulk.config.Task;
|
8
13
|
import org.embulk.config.TaskSource;
|
9
14
|
|
10
|
-
import java.util.List;
|
11
|
-
import java.util.HashMap;
|
12
15
|
import org.embulk.spi.type.Type;
|
16
|
+
import org.embulk.spi.type.BooleanType;
|
17
|
+
import org.embulk.spi.type.LongType;
|
18
|
+
import org.embulk.spi.type.DoubleType;
|
19
|
+
import org.embulk.spi.type.StringType;
|
13
20
|
import org.embulk.spi.type.TimestampType;
|
14
|
-
import com.google.common.collect.ImmutableList;
|
15
21
|
|
16
22
|
import org.embulk.spi.FilterPlugin;
|
17
23
|
import org.embulk.spi.Exec;
|
@@ -25,9 +31,16 @@ import org.embulk.spi.Column;
|
|
25
31
|
import org.embulk.spi.ColumnVisitor;
|
26
32
|
import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
|
27
33
|
|
34
|
+
import org.joda.time.DateTimeZone;
|
35
|
+
import org.embulk.spi.time.Timestamp;
|
36
|
+
import org.embulk.spi.time.TimestampParser;
|
37
|
+
import org.embulk.spi.time.TimestampParseException;
|
38
|
+
import com.google.common.base.Throwables;
|
39
|
+
|
28
40
|
public class ColumnFilterPlugin implements FilterPlugin
|
29
41
|
{
|
30
|
-
public interface PluginTask
|
42
|
+
public interface PluginTask
|
43
|
+
extends Task, TimestampParser.Task
|
31
44
|
{
|
32
45
|
@Config("columns")
|
33
46
|
public List<ColumnConfig> getColumns();
|
@@ -58,12 +71,20 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
58
71
|
control.run(task.dump(), outputSchema);
|
59
72
|
}
|
60
73
|
|
74
|
+
private final Logger log;
|
75
|
+
|
76
|
+
public ColumnFilterPlugin()
|
77
|
+
{
|
78
|
+
log = Exec.getLogger(ColumnFilterPlugin.class);
|
79
|
+
}
|
80
|
+
|
61
81
|
@Override
|
62
82
|
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
63
83
|
Schema outputSchema, PageOutput output)
|
64
84
|
{
|
65
85
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
66
86
|
|
87
|
+
// Map outputColumn => inputColumn
|
67
88
|
HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
68
89
|
for (Column outputColumn: outputSchema.getColumns()) {
|
69
90
|
for (Column inputColumn: inputSchema.getColumns()) {
|
@@ -74,12 +95,52 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
74
95
|
}
|
75
96
|
}
|
76
97
|
|
77
|
-
|
98
|
+
// Map outputColumn => default value if present
|
99
|
+
HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
|
78
100
|
for (Column outputColumn: outputSchema.getColumns()) {
|
101
|
+
Type columnType = outputColumn.getType();
|
102
|
+
|
79
103
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
80
104
|
if (columnConfig.getName().equals(outputColumn.getName())) {
|
81
|
-
|
82
|
-
|
105
|
+
|
106
|
+
if (columnType instanceof BooleanType) {
|
107
|
+
if (columnConfig.getDefault().isPresent()) {
|
108
|
+
Boolean default_value = (Boolean)columnConfig.getDefault().get();
|
109
|
+
outputDefaultMap.put(outputColumn, default_value);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
else if (columnType instanceof LongType) {
|
113
|
+
if (columnConfig.getDefault().isPresent()) {
|
114
|
+
Long default_value = new Long(columnConfig.getDefault().get().toString());
|
115
|
+
outputDefaultMap.put(outputColumn, default_value);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
else if (columnType instanceof DoubleType) {
|
119
|
+
if (columnConfig.getDefault().isPresent()) {
|
120
|
+
Double default_value = new Double(columnConfig.getDefault().get().toString());
|
121
|
+
outputDefaultMap.put(outputColumn, default_value);
|
122
|
+
}
|
123
|
+
}
|
124
|
+
else if (columnType instanceof StringType) {
|
125
|
+
if (columnConfig.getDefault().isPresent()) {
|
126
|
+
String default_value = (String)columnConfig.getDefault().get();
|
127
|
+
outputDefaultMap.put(outputColumn, default_value);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
else if (columnType instanceof TimestampType) {
|
131
|
+
if (columnConfig.getDefault().isPresent()) {
|
132
|
+
String time = (String)columnConfig.getDefault().get();
|
133
|
+
String format = (String)columnConfig.getFormat().get();
|
134
|
+
DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
|
135
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
136
|
+
try {
|
137
|
+
Timestamp default_value = parser.parse(time);
|
138
|
+
outputDefaultMap.put(outputColumn, default_value);
|
139
|
+
} catch(TimestampParseException ex) {
|
140
|
+
throw Throwables.propagate(ex);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
83
144
|
}
|
84
145
|
}
|
85
146
|
}
|
@@ -120,11 +181,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
120
181
|
public void booleanColumn(Column outputColumn) {
|
121
182
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
122
183
|
if (pageReader.isNull(inputColumn)) {
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
|
127
|
-
pageBuilder.setBoolean(outputColumn, default_value);
|
184
|
+
Boolean default_value = (Boolean)outputDefaultMap.get(outputColumn);
|
185
|
+
if (default_value != null) {
|
186
|
+
pageBuilder.setBoolean(outputColumn, default_value.booleanValue());
|
128
187
|
} else {
|
129
188
|
pageBuilder.setNull(outputColumn);
|
130
189
|
}
|
@@ -137,11 +196,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
137
196
|
public void longColumn(Column outputColumn) {
|
138
197
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
139
198
|
if (pageReader.isNull(inputColumn)) {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
|
144
|
-
pageBuilder.setLong(outputColumn, default_value);
|
199
|
+
Long default_value = (Long)outputDefaultMap.get(outputColumn);
|
200
|
+
if (default_value != null) {
|
201
|
+
pageBuilder.setLong(outputColumn, default_value.longValue());
|
145
202
|
} else {
|
146
203
|
pageBuilder.setNull(outputColumn);
|
147
204
|
}
|
@@ -154,10 +211,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
154
211
|
public void doubleColumn(Column outputColumn) {
|
155
212
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
156
213
|
if (pageReader.isNull(inputColumn)) {
|
157
|
-
|
158
|
-
if (
|
159
|
-
|
160
|
-
pageBuilder.setDouble(outputColumn, default_value);
|
214
|
+
Double default_value = (Double)outputDefaultMap.get(outputColumn);
|
215
|
+
if (default_value != null) {
|
216
|
+
pageBuilder.setDouble(outputColumn, default_value.doubleValue());
|
161
217
|
} else {
|
162
218
|
pageBuilder.setNull(outputColumn);
|
163
219
|
}
|
@@ -170,9 +226,8 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
170
226
|
public void stringColumn(Column outputColumn) {
|
171
227
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
172
228
|
if (pageReader.isNull(inputColumn)) {
|
173
|
-
|
174
|
-
if (
|
175
|
-
String default_value = (String)columnConfig.getDefault().get();
|
229
|
+
String default_value = (String)outputDefaultMap.get(outputColumn);
|
230
|
+
if (default_value != null) {
|
176
231
|
pageBuilder.setString(outputColumn, default_value);
|
177
232
|
} else {
|
178
233
|
pageBuilder.setNull(outputColumn);
|
@@ -186,8 +241,12 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
186
241
|
public void timestampColumn(Column outputColumn) {
|
187
242
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
188
243
|
if (pageReader.isNull(inputColumn)) {
|
189
|
-
|
190
|
-
|
244
|
+
Timestamp default_value = (Timestamp)outputDefaultMap.get(outputColumn);
|
245
|
+
if (default_value != null) {
|
246
|
+
pageBuilder.setTimestamp(outputColumn, default_value);
|
247
|
+
} else {
|
248
|
+
pageBuilder.setNull(outputColumn);
|
249
|
+
}
|
191
250
|
} else {
|
192
251
|
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
193
252
|
}
|
@@ -13,4 +13,12 @@ public interface ColumnConfig extends Task
|
|
13
13
|
@Config("default")
|
14
14
|
@ConfigDefault("null")
|
15
15
|
public Optional<Object> getDefault();
|
16
|
+
|
17
|
+
@Config("format")
|
18
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
|
19
|
+
public Optional<String> getFormat();
|
20
|
+
|
21
|
+
@Config("timezone")
|
22
|
+
@ConfigDefault("\"UTC\"")
|
23
|
+
public Optional<String> getTimezone();
|
16
24
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -60,7 +60,7 @@ files:
|
|
60
60
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
61
61
|
- src/main/java/org/embulk/filter/column/ColumnConfig.java
|
62
62
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
63
|
-
- classpath/embulk-filter-column-0.1.
|
63
|
+
- classpath/embulk-filter-column-0.1.5.jar
|
64
64
|
homepage: https://github.com/sonots/embulk-filter-column
|
65
65
|
licenses:
|
66
66
|
- MIT
|