embulk-filter-column 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad887d441f728829ef11a8f5e6d6e76c5abb7ebe
4
- data.tar.gz: 091933497cb175efdeeb0ecef88c2654dc9dd3cd
3
+ metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
4
+ data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
5
5
  SHA512:
6
- metadata.gz: 7a0ca14d8629cff148e580cf4f62f312fa22dbb0c6caf7fc35dea67cb4b8f699ac21f987dee17b0a497cc7310c0076613cbe1c46bc3dd9e5912c928db56101ca
7
- data.tar.gz: 6c12d777e927becf2fd95fee004b9c021dd2b239db818e6bc539424db2386f62fd91aac4d4dc6ae8759ec6da88b96e1e95781c203ed62dbd63dacbf9a9dbad51
6
+ metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
7
+ data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
data/CHANGELOG.md CHANGED
@@ -1,4 +1,11 @@
1
- # 0.2.0
1
+ # 0.3.0 (2015-10-27)
2
+
3
+ Enhancements:
4
+
5
+ * Add `default_timestamp_format` option
6
+ * Add `default_timezone` option
7
+
8
+ # 0.2.0 (2015-10-27)
2
9
 
3
10
  Enhancements:
4
11
 
data/README.md CHANGED
@@ -10,18 +10,18 @@ A filter plugin for Embulk to filter out columns
10
10
  - **name**: name of column (required)
11
11
  - **type**: type of column (required to add)
12
12
  - **default**: default value used if input is null (required to add)
13
- - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`, required to add)
14
- - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`, required to add)
13
+ - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
14
+ - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
15
15
  - **add_columns**: columns to add (array of hash)
16
16
  - **name**: name of column (required)
17
17
  - **type**: type of column (required)
18
18
  - **default**: value of column (required)
19
- - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
20
- - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`)
19
+ - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
20
+ - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
21
21
  - **drop_columns**: columns to drop (array of hash)
22
22
  - **name**: name of column (required)
23
-
24
- NOTE: column type is automatically retrieved from input data (inputSchema)
23
+ - **default_timestamp_format**: default timestamp format for timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
24
+ - **default_timezone**: default timezone for timestamp columns (string, default is `UTC`)
25
25
 
26
26
  ## Example (columns)
27
27
 
@@ -51,6 +51,8 @@ reduces columns to only `time`, `id`, and `key` columns as:
51
51
  2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
52
52
  ```
53
53
 
54
+ Note that column types are automatically retrieved from input data (inputSchema).
55
+
54
56
  ## Example (add_columns)
55
57
 
56
58
  Say input.csv is as follows:
@@ -66,7 +68,7 @@ time,id,key,score
66
68
  filters:
67
69
  - type: column
68
70
  add_columns:
69
- - {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
71
+ - {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
70
72
  ```
71
73
 
72
74
  add `d` column as:
@@ -96,7 +98,7 @@ filters:
96
98
  - {key: id}
97
99
  ```
98
100
 
99
- add `time` and `id` columns as:
101
+ drop `time` and `id` columns as:
100
102
 
101
103
  ```
102
104
  Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.2.0"
15
+ version = "0.3.0"
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
@@ -15,8 +15,11 @@ in:
15
15
  - {name: score, type: double}
16
16
  filters:
17
17
  - type: column
18
+ default_timezone: "Asia/Tokyo"
19
+ default_timestamp_format: "%Y-%m-%d"
18
20
  add_columns:
19
21
  - {name: foo, type: long, default: 1 }
20
- - {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d" }
22
+ - {name: d, type: timestamp, default: "2015-07-13" }
23
+ - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
21
24
  out:
22
25
  type: stdout
@@ -40,7 +40,6 @@ import com.google.common.base.Throwables;
40
40
  import org.embulk.config.Config;
41
41
  import org.embulk.config.ConfigDefault;
42
42
  import com.google.common.base.Optional;
43
- import org.jruby.embed.ScriptingContainer;
44
43
  import org.embulk.spi.SchemaConfigException;
45
44
 
46
45
  public class ColumnFilterPlugin implements FilterPlugin
@@ -66,12 +65,12 @@ public class ColumnFilterPlugin implements FilterPlugin
66
65
  public Optional<Object> getDefault();
67
66
 
68
67
  @Config("format")
69
- @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
68
+ @ConfigDefault("null")
70
69
  public Optional<String> getFormat();
71
70
 
72
71
  @Config("timezone")
73
- @ConfigDefault("\"UTC\"")
74
- public Optional<String> getTimezone();
72
+ @ConfigDefault("null")
73
+ public Optional<DateTimeZone> getTimeZone();
75
74
  }
76
75
 
77
76
  public interface PluginTask extends Task, TimestampParser.Task
@@ -87,6 +86,8 @@ public class ColumnFilterPlugin implements FilterPlugin
87
86
  @Config("drop_columns")
88
87
  @ConfigDefault("[]")
89
88
  public List<ColumnConfig> getDropColumns();
89
+
90
+ // See TimestampParser for default_timestamp_format, and default_timezone
90
91
  }
91
92
 
92
93
  @Override
@@ -183,7 +184,7 @@ public class ColumnFilterPlugin implements FilterPlugin
183
184
  return null;
184
185
  }
185
186
 
186
- private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, ScriptingContainer jruby) {
187
+ private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
187
188
  for (ColumnConfig columnConfig : columnConfigs) {
188
189
  if (columnConfig.getName().equals(name)) {
189
190
  if (type instanceof BooleanType) {
@@ -208,10 +209,20 @@ public class ColumnFilterPlugin implements FilterPlugin
208
209
  }
209
210
  else if (type instanceof TimestampType) {
210
211
  if (columnConfig.getDefault().isPresent()) {
211
- String time = (String)columnConfig.getDefault().get();
212
- String format = (String)columnConfig.getFormat().get();
213
- DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
214
- TimestampParser parser = new TimestampParser(jruby, format, timezone);
212
+ String time = (String)columnConfig.getDefault().get();
213
+ String format = null;
214
+ if (columnConfig.getFormat().isPresent()) {
215
+ format = columnConfig.getFormat().get();
216
+ } else {
217
+ format = task.getDefaultTimestampFormat();
218
+ }
219
+ DateTimeZone timezone = null;
220
+ if (columnConfig.getTimeZone().isPresent()) {
221
+ timezone = columnConfig.getTimeZone().get();
222
+ } else {
223
+ timezone = task.getDefaultTimeZone();
224
+ }
225
+ TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
215
226
  try {
216
227
  Timestamp default_value = parser.parse(time);
217
228
  return default_value;
@@ -245,9 +256,9 @@ public class ColumnFilterPlugin implements FilterPlugin
245
256
  String name = outputColumn.getName();
246
257
  Type type = outputColumn.getType();
247
258
 
248
- Object default_value = getDefault(name, type, task.getColumns(), task.getJRuby());
259
+ Object default_value = getDefault(name, type, task.getColumns(), task);
249
260
  if (default_value == null) {
250
- default_value = getDefault(name, type, task.getAddColumns(), task.getJRuby());
261
+ default_value = getDefault(name, type, task.getAddColumns(), task);
251
262
  }
252
263
  if (default_value != null) {
253
264
  outputDefaultMap.put(outputColumn, default_value);
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -61,7 +61,7 @@ files:
61
61
  - lib/embulk/filter/column.rb
62
62
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
63
63
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
64
- - classpath/embulk-filter-column-0.2.0.jar
64
+ - classpath/embulk-filter-column-0.3.0.jar
65
65
  homepage: https://github.com/sonots/embulk-filter-column
66
66
  licenses:
67
67
  - MIT