embulk-filter-column 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad887d441f728829ef11a8f5e6d6e76c5abb7ebe
4
- data.tar.gz: 091933497cb175efdeeb0ecef88c2654dc9dd3cd
3
+ metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
4
+ data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
5
5
  SHA512:
6
- metadata.gz: 7a0ca14d8629cff148e580cf4f62f312fa22dbb0c6caf7fc35dea67cb4b8f699ac21f987dee17b0a497cc7310c0076613cbe1c46bc3dd9e5912c928db56101ca
7
- data.tar.gz: 6c12d777e927becf2fd95fee004b9c021dd2b239db818e6bc539424db2386f62fd91aac4d4dc6ae8759ec6da88b96e1e95781c203ed62dbd63dacbf9a9dbad51
6
+ metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
7
+ data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
data/CHANGELOG.md CHANGED
@@ -1,4 +1,11 @@
1
- # 0.2.0
1
+ # 0.3.0 (2015-10-27)
2
+
3
+ Enhancements:
4
+
5
+ * Add `default_timestamp_format` option
6
+ * Add `default_timezone` option
7
+
8
+ # 0.2.0 (2015-10-27)
2
9
 
3
10
  Enhancements:
4
11
 
data/README.md CHANGED
@@ -10,18 +10,18 @@ A filter plugin for Embulk to filter out columns
10
10
  - **name**: name of column (required)
11
11
  - **type**: type of column (required to add)
12
12
  - **default**: default value used if input is null (required to add)
13
- - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`, required to add)
14
- - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`, required to add)
13
+ - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
14
+ - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
15
15
  - **add_columns**: columns to add (array of hash)
16
16
  - **name**: name of column (required)
17
17
  - **type**: type of column (required)
18
18
  - **default**: value of column (required)
19
- - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
20
- - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`)
19
+ - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
20
+ - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
21
21
  - **drop_columns**: columns to drop (array of hash)
22
22
  - **name**: name of column (required)
23
-
24
- NOTE: column type is automatically retrieved from input data (inputSchema)
23
+ - **default_timestamp_format**: default timestamp format for timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
24
+ - **default_timezone**: default timezone for timestamp columns (string, default is `UTC`)
25
25
 
26
26
  ## Example (columns)
27
27
 
@@ -51,6 +51,8 @@ reduces columns to only `time`, `id`, and `key` columns as:
51
51
  2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
52
52
  ```
53
53
 
54
+ Note that column types are automatically retrieved from input data (inputSchema).
55
+
54
56
  ## Example (add_columns)
55
57
 
56
58
  Say input.csv is as follows:
@@ -66,7 +68,7 @@ time,id,key,score
66
68
  filters:
67
69
  - type: column
68
70
  add_columns:
69
- - {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
71
+ - {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
70
72
  ```
71
73
 
72
74
  add `d` column as:
@@ -96,7 +98,7 @@ filters:
96
98
  - {key: id}
97
99
  ```
98
100
 
99
- add `time` and `id` columns as:
101
+ drop `time` and `id` columns as:
100
102
 
101
103
  ```
102
104
  Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.2.0"
15
+ version = "0.3.0"
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
@@ -15,8 +15,11 @@ in:
15
15
  - {name: score, type: double}
16
16
  filters:
17
17
  - type: column
18
+ default_timezone: "Asia/Tokyo"
19
+ default_timestamp_format: "%Y-%m-%d"
18
20
  add_columns:
19
21
  - {name: foo, type: long, default: 1 }
20
- - {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d" }
22
+ - {name: d, type: timestamp, default: "2015-07-13" }
23
+ - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
21
24
  out:
22
25
  type: stdout
@@ -40,7 +40,6 @@ import com.google.common.base.Throwables;
40
40
  import org.embulk.config.Config;
41
41
  import org.embulk.config.ConfigDefault;
42
42
  import com.google.common.base.Optional;
43
- import org.jruby.embed.ScriptingContainer;
44
43
  import org.embulk.spi.SchemaConfigException;
45
44
 
46
45
  public class ColumnFilterPlugin implements FilterPlugin
@@ -66,12 +65,12 @@ public class ColumnFilterPlugin implements FilterPlugin
66
65
  public Optional<Object> getDefault();
67
66
 
68
67
  @Config("format")
69
- @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
68
+ @ConfigDefault("null")
70
69
  public Optional<String> getFormat();
71
70
 
72
71
  @Config("timezone")
73
- @ConfigDefault("\"UTC\"")
74
- public Optional<String> getTimezone();
72
+ @ConfigDefault("null")
73
+ public Optional<DateTimeZone> getTimeZone();
75
74
  }
76
75
 
77
76
  public interface PluginTask extends Task, TimestampParser.Task
@@ -87,6 +86,8 @@ public class ColumnFilterPlugin implements FilterPlugin
87
86
  @Config("drop_columns")
88
87
  @ConfigDefault("[]")
89
88
  public List<ColumnConfig> getDropColumns();
89
+
90
+ // See TimestampParser for default_timestamp_format, and default_timezone
90
91
  }
91
92
 
92
93
  @Override
@@ -183,7 +184,7 @@ public class ColumnFilterPlugin implements FilterPlugin
183
184
  return null;
184
185
  }
185
186
 
186
- private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, ScriptingContainer jruby) {
187
+ private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
187
188
  for (ColumnConfig columnConfig : columnConfigs) {
188
189
  if (columnConfig.getName().equals(name)) {
189
190
  if (type instanceof BooleanType) {
@@ -208,10 +209,20 @@ public class ColumnFilterPlugin implements FilterPlugin
208
209
  }
209
210
  else if (type instanceof TimestampType) {
210
211
  if (columnConfig.getDefault().isPresent()) {
211
- String time = (String)columnConfig.getDefault().get();
212
- String format = (String)columnConfig.getFormat().get();
213
- DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
214
- TimestampParser parser = new TimestampParser(jruby, format, timezone);
212
+ String time = (String)columnConfig.getDefault().get();
213
+ String format = null;
214
+ if (columnConfig.getFormat().isPresent()) {
215
+ format = columnConfig.getFormat().get();
216
+ } else {
217
+ format = task.getDefaultTimestampFormat();
218
+ }
219
+ DateTimeZone timezone = null;
220
+ if (columnConfig.getTimeZone().isPresent()) {
221
+ timezone = columnConfig.getTimeZone().get();
222
+ } else {
223
+ timezone = task.getDefaultTimeZone();
224
+ }
225
+ TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
215
226
  try {
216
227
  Timestamp default_value = parser.parse(time);
217
228
  return default_value;
@@ -245,9 +256,9 @@ public class ColumnFilterPlugin implements FilterPlugin
245
256
  String name = outputColumn.getName();
246
257
  Type type = outputColumn.getType();
247
258
 
248
- Object default_value = getDefault(name, type, task.getColumns(), task.getJRuby());
259
+ Object default_value = getDefault(name, type, task.getColumns(), task);
249
260
  if (default_value == null) {
250
- default_value = getDefault(name, type, task.getAddColumns(), task.getJRuby());
261
+ default_value = getDefault(name, type, task.getAddColumns(), task);
251
262
  }
252
263
  if (default_value != null) {
253
264
  outputDefaultMap.put(outputColumn, default_value);
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -61,7 +61,7 @@ files:
61
61
  - lib/embulk/filter/column.rb
62
62
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
63
63
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
64
- - classpath/embulk-filter-column-0.2.0.jar
64
+ - classpath/embulk-filter-column-0.3.0.jar
65
65
  homepage: https://github.com/sonots/embulk-filter-column
66
66
  licenses:
67
67
  - MIT