embulk-filter-column 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
4
- data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
3
+ metadata.gz: d4b22fc2b07962eb1295a87ad76ecf7d373e8244
4
+ data.tar.gz: 4c81bf7c05f93f544c883adf165751108a62c7bd
5
5
  SHA512:
6
- metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
7
- data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
6
+ metadata.gz: cc307a1b813dfd435d8d94c60f44886410546dd1065170218cc8c331930b940d578ed99a92f38f705566b73dc2c08878456762a3924cb8b5adf39e8144bdbd83
7
+ data.tar.gz: a304d053cc7cf08b562afc84dc663909114b6c73da262fb4be2b82b9c58f17975014ad0d59e3a4d383f30e6f636fae4873f96a704f65dbf962f684eb84eebb74
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5
2
+
3
+ Enhancements:
4
+
5
+ * Support timestamp default
6
+
1
7
  # 0.1.4
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -7,6 +7,10 @@ A filter plugin for Embulk to filter out columns
7
7
  - **columns**: columns (array of hash, required)
8
8
  - **name**: name of column
9
9
  - **default**: default value used if input is null
10
+ - **format**: special option for timestamp column, specify the format of timestamp default (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
11
+ - **timezone**: special option for timestamp column, specify the timezone of timestamp default (string, default is `UTC`)
12
+
13
+ NOTE: column type is automatically retrieved from input data (inputSchema)
10
14
 
11
15
  ## Example
12
16
 
@@ -14,11 +18,12 @@ A filter plugin for Embulk to filter out columns
14
18
  filters:
15
19
  - type: column
16
20
  columns:
21
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
17
22
  - {name: id}
18
- - {name: name, default: 'foo'}
23
+ - {name: name, default: "foo"}
19
24
  ```
20
25
 
21
- reduces columns to only `id` and `name` columns.
26
+ reduces columns to only `time`, `id`, and `name` columns.
22
27
 
23
28
  ## Development
24
29
 
@@ -29,6 +34,12 @@ $ ./gradlew classpath
29
34
  $ embulk run -I lib example.yml
30
35
  ```
31
36
 
37
+ Run test:
38
+
39
+ ```
40
+ $ ./gradew test
41
+ ```
42
+
32
43
  Release gem:
33
44
 
34
45
  ```
data/build.gradle CHANGED
@@ -12,11 +12,11 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.4"
15
+ version = "0.1.5"
16
16
 
17
17
  dependencies {
18
- compile "org.embulk:embulk-core:0.6.12"
19
- provided "org.embulk:embulk-core:0.6.12"
18
+ compile "org.embulk:embulk-core:0.6.16"
19
+ provided "org.embulk:embulk-core:0.6.16"
20
20
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
21
21
  testCompile "junit:junit:4.+"
22
22
  }
data/example.yml CHANGED
@@ -16,12 +16,14 @@ in:
16
16
  skip_header_lines: 1
17
17
  comment_line_marker: '#'
18
18
  columns:
19
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
19
20
  - {name: id, type: long}
20
21
  - {name: name, type: string}
21
22
  - {name: score, type: double}
22
23
  filters:
23
24
  - type: column
24
25
  columns:
26
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
25
27
  - {name: name, default: "foo"}
26
28
  - {name: foo, default: 1}
27
29
  - {name: id}
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter;
2
2
 
3
+ import java.util.List;
4
+ import java.util.HashMap;
5
+ import com.google.common.collect.ImmutableList;
6
+ import org.slf4j.Logger;
7
+
3
8
  import org.embulk.config.Config;
4
9
  import org.embulk.config.ConfigDefault;
5
10
  import org.embulk.config.ConfigDiff;
@@ -7,11 +12,12 @@ import org.embulk.config.ConfigSource;
7
12
  import org.embulk.config.Task;
8
13
  import org.embulk.config.TaskSource;
9
14
 
10
- import java.util.List;
11
- import java.util.HashMap;
12
15
  import org.embulk.spi.type.Type;
16
+ import org.embulk.spi.type.BooleanType;
17
+ import org.embulk.spi.type.LongType;
18
+ import org.embulk.spi.type.DoubleType;
19
+ import org.embulk.spi.type.StringType;
13
20
  import org.embulk.spi.type.TimestampType;
14
- import com.google.common.collect.ImmutableList;
15
21
 
16
22
  import org.embulk.spi.FilterPlugin;
17
23
  import org.embulk.spi.Exec;
@@ -25,9 +31,16 @@ import org.embulk.spi.Column;
25
31
  import org.embulk.spi.ColumnVisitor;
26
32
  import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
27
33
 
34
+ import org.joda.time.DateTimeZone;
35
+ import org.embulk.spi.time.Timestamp;
36
+ import org.embulk.spi.time.TimestampParser;
37
+ import org.embulk.spi.time.TimestampParseException;
38
+ import com.google.common.base.Throwables;
39
+
28
40
  public class ColumnFilterPlugin implements FilterPlugin
29
41
  {
30
- public interface PluginTask extends Task
42
+ public interface PluginTask
43
+ extends Task, TimestampParser.Task
31
44
  {
32
45
  @Config("columns")
33
46
  public List<ColumnConfig> getColumns();
@@ -58,12 +71,20 @@ public class ColumnFilterPlugin implements FilterPlugin
58
71
  control.run(task.dump(), outputSchema);
59
72
  }
60
73
 
74
+ private final Logger log;
75
+
76
+ public ColumnFilterPlugin()
77
+ {
78
+ log = Exec.getLogger(ColumnFilterPlugin.class);
79
+ }
80
+
61
81
  @Override
62
82
  public PageOutput open(TaskSource taskSource, Schema inputSchema,
63
83
  Schema outputSchema, PageOutput output)
64
84
  {
65
85
  PluginTask task = taskSource.loadTask(PluginTask.class);
66
86
 
87
+ // Map outputColumn => inputColumn
67
88
  HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
68
89
  for (Column outputColumn: outputSchema.getColumns()) {
69
90
  for (Column inputColumn: inputSchema.getColumns()) {
@@ -74,12 +95,52 @@ public class ColumnFilterPlugin implements FilterPlugin
74
95
  }
75
96
  }
76
97
 
77
- HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
98
+ // Map outputColumn => default value if present
99
+ HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
78
100
  for (Column outputColumn: outputSchema.getColumns()) {
101
+ Type columnType = outputColumn.getType();
102
+
79
103
  for (ColumnConfig columnConfig : task.getColumns()) {
80
104
  if (columnConfig.getName().equals(outputColumn.getName())) {
81
- outputColumnConfigMap.put(outputColumn, columnConfig);
82
- break;
105
+
106
+ if (columnType instanceof BooleanType) {
107
+ if (columnConfig.getDefault().isPresent()) {
108
+ Boolean default_value = (Boolean)columnConfig.getDefault().get();
109
+ outputDefaultMap.put(outputColumn, default_value);
110
+ }
111
+ }
112
+ else if (columnType instanceof LongType) {
113
+ if (columnConfig.getDefault().isPresent()) {
114
+ Long default_value = new Long(columnConfig.getDefault().get().toString());
115
+ outputDefaultMap.put(outputColumn, default_value);
116
+ }
117
+ }
118
+ else if (columnType instanceof DoubleType) {
119
+ if (columnConfig.getDefault().isPresent()) {
120
+ Double default_value = new Double(columnConfig.getDefault().get().toString());
121
+ outputDefaultMap.put(outputColumn, default_value);
122
+ }
123
+ }
124
+ else if (columnType instanceof StringType) {
125
+ if (columnConfig.getDefault().isPresent()) {
126
+ String default_value = (String)columnConfig.getDefault().get();
127
+ outputDefaultMap.put(outputColumn, default_value);
128
+ }
129
+ }
130
+ else if (columnType instanceof TimestampType) {
131
+ if (columnConfig.getDefault().isPresent()) {
132
+ String time = (String)columnConfig.getDefault().get();
133
+ String format = (String)columnConfig.getFormat().get();
134
+ DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
135
+ TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
136
+ try {
137
+ Timestamp default_value = parser.parse(time);
138
+ outputDefaultMap.put(outputColumn, default_value);
139
+ } catch(TimestampParseException ex) {
140
+ throw Throwables.propagate(ex);
141
+ }
142
+ }
143
+ }
83
144
  }
84
145
  }
85
146
  }
@@ -120,11 +181,9 @@ public class ColumnFilterPlugin implements FilterPlugin
120
181
  public void booleanColumn(Column outputColumn) {
121
182
  Column inputColumn = outputInputColumnMap.get(outputColumn);
122
183
  if (pageReader.isNull(inputColumn)) {
123
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
124
- // ToDo: speed up
125
- if (columnConfig.getDefault().isPresent()) {
126
- boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
127
- pageBuilder.setBoolean(outputColumn, default_value);
184
+ Boolean default_value = (Boolean)outputDefaultMap.get(outputColumn);
185
+ if (default_value != null) {
186
+ pageBuilder.setBoolean(outputColumn, default_value.booleanValue());
128
187
  } else {
129
188
  pageBuilder.setNull(outputColumn);
130
189
  }
@@ -137,11 +196,9 @@ public class ColumnFilterPlugin implements FilterPlugin
137
196
  public void longColumn(Column outputColumn) {
138
197
  Column inputColumn = outputInputColumnMap.get(outputColumn);
139
198
  if (pageReader.isNull(inputColumn)) {
140
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
141
- // ToDo: speed up
142
- if (columnConfig.getDefault().isPresent()) {
143
- long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
144
- pageBuilder.setLong(outputColumn, default_value);
199
+ Long default_value = (Long)outputDefaultMap.get(outputColumn);
200
+ if (default_value != null) {
201
+ pageBuilder.setLong(outputColumn, default_value.longValue());
145
202
  } else {
146
203
  pageBuilder.setNull(outputColumn);
147
204
  }
@@ -154,10 +211,9 @@ public class ColumnFilterPlugin implements FilterPlugin
154
211
  public void doubleColumn(Column outputColumn) {
155
212
  Column inputColumn = outputInputColumnMap.get(outputColumn);
156
213
  if (pageReader.isNull(inputColumn)) {
157
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
158
- if (columnConfig.getDefault().isPresent()) {
159
- double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
160
- pageBuilder.setDouble(outputColumn, default_value);
214
+ Double default_value = (Double)outputDefaultMap.get(outputColumn);
215
+ if (default_value != null) {
216
+ pageBuilder.setDouble(outputColumn, default_value.doubleValue());
161
217
  } else {
162
218
  pageBuilder.setNull(outputColumn);
163
219
  }
@@ -170,9 +226,8 @@ public class ColumnFilterPlugin implements FilterPlugin
170
226
  public void stringColumn(Column outputColumn) {
171
227
  Column inputColumn = outputInputColumnMap.get(outputColumn);
172
228
  if (pageReader.isNull(inputColumn)) {
173
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
174
- if (columnConfig.getDefault().isPresent()) {
175
- String default_value = (String)columnConfig.getDefault().get();
229
+ String default_value = (String)outputDefaultMap.get(outputColumn);
230
+ if (default_value != null) {
176
231
  pageBuilder.setString(outputColumn, default_value);
177
232
  } else {
178
233
  pageBuilder.setNull(outputColumn);
@@ -186,8 +241,12 @@ public class ColumnFilterPlugin implements FilterPlugin
186
241
  public void timestampColumn(Column outputColumn) {
187
242
  Column inputColumn = outputInputColumnMap.get(outputColumn);
188
243
  if (pageReader.isNull(inputColumn)) {
189
- // ToDo: default for timestamp
190
- pageBuilder.setNull(outputColumn);
244
+ Timestamp default_value = (Timestamp)outputDefaultMap.get(outputColumn);
245
+ if (default_value != null) {
246
+ pageBuilder.setTimestamp(outputColumn, default_value);
247
+ } else {
248
+ pageBuilder.setNull(outputColumn);
249
+ }
191
250
  } else {
192
251
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
193
252
  }
@@ -13,4 +13,12 @@ public interface ColumnConfig extends Task
13
13
  @Config("default")
14
14
  @ConfigDefault("null")
15
15
  public Optional<Object> getDefault();
16
+
17
+ @Config("format")
18
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
19
+ public Optional<String> getFormat();
20
+
21
+ @Config("timezone")
22
+ @ConfigDefault("\"UTC\"")
23
+ public Optional<String> getTimezone();
16
24
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-09 00:00:00.000000000 Z
11
+ date: 2015-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -60,7 +60,7 @@ files:
60
60
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
61
61
  - src/main/java/org/embulk/filter/column/ColumnConfig.java
62
62
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
63
- - classpath/embulk-filter-column-0.1.4.jar
63
+ - classpath/embulk-filter-column-0.1.5.jar
64
64
  homepage: https://github.com/sonots/embulk-filter-column
65
65
  licenses:
66
66
  - MIT