embulk-filter-column 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +10 -8
- data/build.gradle +1 -1
- data/example/add_columns.yml +4 -1
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +22 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
|
4
|
+
data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
|
7
|
+
data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -10,18 +10,18 @@ A filter plugin for Embulk to filter out columns
|
|
10
10
|
- **name**: name of column (required)
|
11
11
|
- **type**: type of column (required to add)
|
12
12
|
- **default**: default value used if input is null (required to add)
|
13
|
-
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is
|
14
|
-
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `
|
13
|
+
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
14
|
+
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
15
15
|
- **add_columns**: columns to add (array of hash)
|
16
16
|
- **name**: name of column (required)
|
17
17
|
- **type**: type of column (required)
|
18
18
|
- **default**: value of column (required)
|
19
|
-
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is
|
20
|
-
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `
|
19
|
+
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
20
|
+
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
21
21
|
- **drop_columns**: columns to drop (array of hash)
|
22
22
|
- **name**: name of column (required)
|
23
|
-
|
24
|
-
|
23
|
+
- **default_timestamp_format**: default timestamp format for timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
24
|
+
- **default_timezone**: default timezone for timestamp columns (string, default is `UTC`)
|
25
25
|
|
26
26
|
## Example (columns)
|
27
27
|
|
@@ -51,6 +51,8 @@ reduces columns to only `time`, `id`, and `key` columns as:
|
|
51
51
|
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
|
52
52
|
```
|
53
53
|
|
54
|
+
Note that column types are automatically retrieved from input data (inputSchema).
|
55
|
+
|
54
56
|
## Example (add_columns)
|
55
57
|
|
56
58
|
Say input.csv is as follows:
|
@@ -66,7 +68,7 @@ time,id,key,score
|
|
66
68
|
filters:
|
67
69
|
- type: column
|
68
70
|
add_columns:
|
69
|
-
- {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
|
71
|
+
- {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
|
70
72
|
```
|
71
73
|
|
72
74
|
add `d` column as:
|
@@ -96,7 +98,7 @@ filters:
|
|
96
98
|
- {key: id}
|
97
99
|
```
|
98
100
|
|
99
|
-
|
101
|
+
drop `time` and `id` columns as:
|
100
102
|
|
101
103
|
```
|
102
104
|
Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -15,8 +15,11 @@ in:
|
|
15
15
|
- {name: score, type: double}
|
16
16
|
filters:
|
17
17
|
- type: column
|
18
|
+
default_timezone: "Asia/Tokyo"
|
19
|
+
default_timestamp_format: "%Y-%m-%d"
|
18
20
|
add_columns:
|
19
21
|
- {name: foo, type: long, default: 1 }
|
20
|
-
- {name: d, type: timestamp, default: "2015-07-13"
|
22
|
+
- {name: d, type: timestamp, default: "2015-07-13" }
|
23
|
+
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
|
21
24
|
out:
|
22
25
|
type: stdout
|
@@ -40,7 +40,6 @@ import com.google.common.base.Throwables;
|
|
40
40
|
import org.embulk.config.Config;
|
41
41
|
import org.embulk.config.ConfigDefault;
|
42
42
|
import com.google.common.base.Optional;
|
43
|
-
import org.jruby.embed.ScriptingContainer;
|
44
43
|
import org.embulk.spi.SchemaConfigException;
|
45
44
|
|
46
45
|
public class ColumnFilterPlugin implements FilterPlugin
|
@@ -66,12 +65,12 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
66
65
|
public Optional<Object> getDefault();
|
67
66
|
|
68
67
|
@Config("format")
|
69
|
-
@ConfigDefault("
|
68
|
+
@ConfigDefault("null")
|
70
69
|
public Optional<String> getFormat();
|
71
70
|
|
72
71
|
@Config("timezone")
|
73
|
-
@ConfigDefault("
|
74
|
-
public Optional<
|
72
|
+
@ConfigDefault("null")
|
73
|
+
public Optional<DateTimeZone> getTimeZone();
|
75
74
|
}
|
76
75
|
|
77
76
|
public interface PluginTask extends Task, TimestampParser.Task
|
@@ -87,6 +86,8 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
87
86
|
@Config("drop_columns")
|
88
87
|
@ConfigDefault("[]")
|
89
88
|
public List<ColumnConfig> getDropColumns();
|
89
|
+
|
90
|
+
// See TimestampParser for default_timestamp_format, and default_timezone
|
90
91
|
}
|
91
92
|
|
92
93
|
@Override
|
@@ -183,7 +184,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
183
184
|
return null;
|
184
185
|
}
|
185
186
|
|
186
|
-
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs,
|
187
|
+
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
|
187
188
|
for (ColumnConfig columnConfig : columnConfigs) {
|
188
189
|
if (columnConfig.getName().equals(name)) {
|
189
190
|
if (type instanceof BooleanType) {
|
@@ -208,10 +209,20 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
208
209
|
}
|
209
210
|
else if (type instanceof TimestampType) {
|
210
211
|
if (columnConfig.getDefault().isPresent()) {
|
211
|
-
String time
|
212
|
-
String format
|
213
|
-
|
214
|
-
|
212
|
+
String time = (String)columnConfig.getDefault().get();
|
213
|
+
String format = null;
|
214
|
+
if (columnConfig.getFormat().isPresent()) {
|
215
|
+
format = columnConfig.getFormat().get();
|
216
|
+
} else {
|
217
|
+
format = task.getDefaultTimestampFormat();
|
218
|
+
}
|
219
|
+
DateTimeZone timezone = null;
|
220
|
+
if (columnConfig.getTimeZone().isPresent()) {
|
221
|
+
timezone = columnConfig.getTimeZone().get();
|
222
|
+
} else {
|
223
|
+
timezone = task.getDefaultTimeZone();
|
224
|
+
}
|
225
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
215
226
|
try {
|
216
227
|
Timestamp default_value = parser.parse(time);
|
217
228
|
return default_value;
|
@@ -245,9 +256,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
245
256
|
String name = outputColumn.getName();
|
246
257
|
Type type = outputColumn.getType();
|
247
258
|
|
248
|
-
Object default_value = getDefault(name, type, task.getColumns(), task
|
259
|
+
Object default_value = getDefault(name, type, task.getColumns(), task);
|
249
260
|
if (default_value == null) {
|
250
|
-
default_value = getDefault(name, type, task.getAddColumns(), task
|
261
|
+
default_value = getDefault(name, type, task.getAddColumns(), task);
|
251
262
|
}
|
252
263
|
if (default_value != null) {
|
253
264
|
outputDefaultMap.put(outputColumn, default_value);
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- lib/embulk/filter/column.rb
|
62
62
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
63
63
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-column-0.
|
64
|
+
- classpath/embulk-filter-column-0.3.0.jar
|
65
65
|
homepage: https://github.com/sonots/embulk-filter-column
|
66
66
|
licenses:
|
67
67
|
- MIT
|