embulk-filter-column 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +10 -8
- data/build.gradle +1 -1
- data/example/add_columns.yml +4 -1
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +22 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
|
4
|
+
data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
|
7
|
+
data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -10,18 +10,18 @@ A filter plugin for Embulk to filter out columns
|
|
10
10
|
- **name**: name of column (required)
|
11
11
|
- **type**: type of column (required to add)
|
12
12
|
- **default**: default value used if input is null (required to add)
|
13
|
-
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is
|
14
|
-
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `
|
13
|
+
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
14
|
+
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
15
15
|
- **add_columns**: columns to add (array of hash)
|
16
16
|
- **name**: name of column (required)
|
17
17
|
- **type**: type of column (required)
|
18
18
|
- **default**: value of column (required)
|
19
|
-
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is
|
20
|
-
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `
|
19
|
+
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
20
|
+
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
21
21
|
- **drop_columns**: columns to drop (array of hash)
|
22
22
|
- **name**: name of column (required)
|
23
|
-
|
24
|
-
|
23
|
+
- **default_timestamp_format**: default timestamp format for timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
24
|
+
- **default_timezone**: default timezone for timestamp columns (string, default is `UTC`)
|
25
25
|
|
26
26
|
## Example (columns)
|
27
27
|
|
@@ -51,6 +51,8 @@ reduces columns to only `time`, `id`, and `key` columns as:
|
|
51
51
|
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
|
52
52
|
```
|
53
53
|
|
54
|
+
Note that column types are automatically retrieved from input data (inputSchema).
|
55
|
+
|
54
56
|
## Example (add_columns)
|
55
57
|
|
56
58
|
Say input.csv is as follows:
|
@@ -66,7 +68,7 @@ time,id,key,score
|
|
66
68
|
filters:
|
67
69
|
- type: column
|
68
70
|
add_columns:
|
69
|
-
- {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
|
71
|
+
- {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
|
70
72
|
```
|
71
73
|
|
72
74
|
add `d` column as:
|
@@ -96,7 +98,7 @@ filters:
|
|
96
98
|
- {key: id}
|
97
99
|
```
|
98
100
|
|
99
|
-
|
101
|
+
drop `time` and `id` columns as:
|
100
102
|
|
101
103
|
```
|
102
104
|
Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -15,8 +15,11 @@ in:
|
|
15
15
|
- {name: score, type: double}
|
16
16
|
filters:
|
17
17
|
- type: column
|
18
|
+
default_timezone: "Asia/Tokyo"
|
19
|
+
default_timestamp_format: "%Y-%m-%d"
|
18
20
|
add_columns:
|
19
21
|
- {name: foo, type: long, default: 1 }
|
20
|
-
- {name: d, type: timestamp, default: "2015-07-13"
|
22
|
+
- {name: d, type: timestamp, default: "2015-07-13" }
|
23
|
+
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
|
21
24
|
out:
|
22
25
|
type: stdout
|
@@ -40,7 +40,6 @@ import com.google.common.base.Throwables;
|
|
40
40
|
import org.embulk.config.Config;
|
41
41
|
import org.embulk.config.ConfigDefault;
|
42
42
|
import com.google.common.base.Optional;
|
43
|
-
import org.jruby.embed.ScriptingContainer;
|
44
43
|
import org.embulk.spi.SchemaConfigException;
|
45
44
|
|
46
45
|
public class ColumnFilterPlugin implements FilterPlugin
|
@@ -66,12 +65,12 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
66
65
|
public Optional<Object> getDefault();
|
67
66
|
|
68
67
|
@Config("format")
|
69
|
-
@ConfigDefault("
|
68
|
+
@ConfigDefault("null")
|
70
69
|
public Optional<String> getFormat();
|
71
70
|
|
72
71
|
@Config("timezone")
|
73
|
-
@ConfigDefault("
|
74
|
-
public Optional<
|
72
|
+
@ConfigDefault("null")
|
73
|
+
public Optional<DateTimeZone> getTimeZone();
|
75
74
|
}
|
76
75
|
|
77
76
|
public interface PluginTask extends Task, TimestampParser.Task
|
@@ -87,6 +86,8 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
87
86
|
@Config("drop_columns")
|
88
87
|
@ConfigDefault("[]")
|
89
88
|
public List<ColumnConfig> getDropColumns();
|
89
|
+
|
90
|
+
// See TimestampParser for default_timestamp_format, and default_timezone
|
90
91
|
}
|
91
92
|
|
92
93
|
@Override
|
@@ -183,7 +184,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
183
184
|
return null;
|
184
185
|
}
|
185
186
|
|
186
|
-
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs,
|
187
|
+
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
|
187
188
|
for (ColumnConfig columnConfig : columnConfigs) {
|
188
189
|
if (columnConfig.getName().equals(name)) {
|
189
190
|
if (type instanceof BooleanType) {
|
@@ -208,10 +209,20 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
208
209
|
}
|
209
210
|
else if (type instanceof TimestampType) {
|
210
211
|
if (columnConfig.getDefault().isPresent()) {
|
211
|
-
String time
|
212
|
-
String format
|
213
|
-
|
214
|
-
|
212
|
+
String time = (String)columnConfig.getDefault().get();
|
213
|
+
String format = null;
|
214
|
+
if (columnConfig.getFormat().isPresent()) {
|
215
|
+
format = columnConfig.getFormat().get();
|
216
|
+
} else {
|
217
|
+
format = task.getDefaultTimestampFormat();
|
218
|
+
}
|
219
|
+
DateTimeZone timezone = null;
|
220
|
+
if (columnConfig.getTimeZone().isPresent()) {
|
221
|
+
timezone = columnConfig.getTimeZone().get();
|
222
|
+
} else {
|
223
|
+
timezone = task.getDefaultTimeZone();
|
224
|
+
}
|
225
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
215
226
|
try {
|
216
227
|
Timestamp default_value = parser.parse(time);
|
217
228
|
return default_value;
|
@@ -245,9 +256,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
245
256
|
String name = outputColumn.getName();
|
246
257
|
Type type = outputColumn.getType();
|
247
258
|
|
248
|
-
Object default_value = getDefault(name, type, task.getColumns(), task
|
259
|
+
Object default_value = getDefault(name, type, task.getColumns(), task);
|
249
260
|
if (default_value == null) {
|
250
|
-
default_value = getDefault(name, type, task.getAddColumns(), task
|
261
|
+
default_value = getDefault(name, type, task.getAddColumns(), task);
|
251
262
|
}
|
252
263
|
if (default_value != null) {
|
253
264
|
outputDefaultMap.put(outputColumn, default_value);
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- lib/embulk/filter/column.rb
|
62
62
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
63
63
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-column-0.
|
64
|
+
- classpath/embulk-filter-column-0.3.0.jar
|
65
65
|
homepage: https://github.com/sonots/embulk-filter-column
|
66
66
|
licenses:
|
67
67
|
- MIT
|