embulk-filter-column 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
4
- data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
3
+ metadata.gz: d4b22fc2b07962eb1295a87ad76ecf7d373e8244
4
+ data.tar.gz: 4c81bf7c05f93f544c883adf165751108a62c7bd
5
5
  SHA512:
6
- metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
7
- data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
6
+ metadata.gz: cc307a1b813dfd435d8d94c60f44886410546dd1065170218cc8c331930b940d578ed99a92f38f705566b73dc2c08878456762a3924cb8b5adf39e8144bdbd83
7
+ data.tar.gz: a304d053cc7cf08b562afc84dc663909114b6c73da262fb4be2b82b9c58f17975014ad0d59e3a4d383f30e6f636fae4873f96a704f65dbf962f684eb84eebb74
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5
2
+
3
+ Enhancements:
4
+
5
+ * Support timestamp default
6
+
1
7
  # 0.1.4
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -7,6 +7,10 @@ A filter plugin for Embulk to filter out columns
7
7
  - **columns**: columns (array of hash, required)
8
8
  - **name**: name of column
9
9
  - **default**: default value used if input is null
10
+ - **format**: special option for timestamp column, specify the format of timestamp default (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
11
+ - **timezone**: special option for timestamp column, specify the timezone of timestamp default (string, default is `UTC`)
12
+
13
+ NOTE: column type is automatically retrieved from input data (inputSchema)
10
14
 
11
15
  ## Example
12
16
 
@@ -14,11 +18,12 @@ A filter plugin for Embulk to filter out columns
14
18
  filters:
15
19
  - type: column
16
20
  columns:
21
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
17
22
  - {name: id}
18
- - {name: name, default: 'foo'}
23
+ - {name: name, default: "foo"}
19
24
  ```
20
25
 
21
- reduces columns to only `id` and `name` columns.
26
+ reduces columns to only `time`, `id`, and `name` columns.
22
27
 
23
28
  ## Development
24
29
 
@@ -29,6 +34,12 @@ $ ./gradlew classpath
29
34
  $ embulk run -I lib example.yml
30
35
  ```
31
36
 
37
+ Run test:
38
+
39
+ ```
40
+ $ ./gradew test
41
+ ```
42
+
32
43
  Release gem:
33
44
 
34
45
  ```
data/build.gradle CHANGED
@@ -12,11 +12,11 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.4"
15
+ version = "0.1.5"
16
16
 
17
17
  dependencies {
18
- compile "org.embulk:embulk-core:0.6.12"
19
- provided "org.embulk:embulk-core:0.6.12"
18
+ compile "org.embulk:embulk-core:0.6.16"
19
+ provided "org.embulk:embulk-core:0.6.16"
20
20
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
21
21
  testCompile "junit:junit:4.+"
22
22
  }
data/example.yml CHANGED
@@ -16,12 +16,14 @@ in:
16
16
  skip_header_lines: 1
17
17
  comment_line_marker: '#'
18
18
  columns:
19
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
19
20
  - {name: id, type: long}
20
21
  - {name: name, type: string}
21
22
  - {name: score, type: double}
22
23
  filters:
23
24
  - type: column
24
25
  columns:
26
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
25
27
  - {name: name, default: "foo"}
26
28
  - {name: foo, default: 1}
27
29
  - {name: id}
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter;
2
2
 
3
+ import java.util.List;
4
+ import java.util.HashMap;
5
+ import com.google.common.collect.ImmutableList;
6
+ import org.slf4j.Logger;
7
+
3
8
  import org.embulk.config.Config;
4
9
  import org.embulk.config.ConfigDefault;
5
10
  import org.embulk.config.ConfigDiff;
@@ -7,11 +12,12 @@ import org.embulk.config.ConfigSource;
7
12
  import org.embulk.config.Task;
8
13
  import org.embulk.config.TaskSource;
9
14
 
10
- import java.util.List;
11
- import java.util.HashMap;
12
15
  import org.embulk.spi.type.Type;
16
+ import org.embulk.spi.type.BooleanType;
17
+ import org.embulk.spi.type.LongType;
18
+ import org.embulk.spi.type.DoubleType;
19
+ import org.embulk.spi.type.StringType;
13
20
  import org.embulk.spi.type.TimestampType;
14
- import com.google.common.collect.ImmutableList;
15
21
 
16
22
  import org.embulk.spi.FilterPlugin;
17
23
  import org.embulk.spi.Exec;
@@ -25,9 +31,16 @@ import org.embulk.spi.Column;
25
31
  import org.embulk.spi.ColumnVisitor;
26
32
  import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
27
33
 
34
+ import org.joda.time.DateTimeZone;
35
+ import org.embulk.spi.time.Timestamp;
36
+ import org.embulk.spi.time.TimestampParser;
37
+ import org.embulk.spi.time.TimestampParseException;
38
+ import com.google.common.base.Throwables;
39
+
28
40
  public class ColumnFilterPlugin implements FilterPlugin
29
41
  {
30
- public interface PluginTask extends Task
42
+ public interface PluginTask
43
+ extends Task, TimestampParser.Task
31
44
  {
32
45
  @Config("columns")
33
46
  public List<ColumnConfig> getColumns();
@@ -58,12 +71,20 @@ public class ColumnFilterPlugin implements FilterPlugin
58
71
  control.run(task.dump(), outputSchema);
59
72
  }
60
73
 
74
+ private final Logger log;
75
+
76
+ public ColumnFilterPlugin()
77
+ {
78
+ log = Exec.getLogger(ColumnFilterPlugin.class);
79
+ }
80
+
61
81
  @Override
62
82
  public PageOutput open(TaskSource taskSource, Schema inputSchema,
63
83
  Schema outputSchema, PageOutput output)
64
84
  {
65
85
  PluginTask task = taskSource.loadTask(PluginTask.class);
66
86
 
87
+ // Map outputColumn => inputColumn
67
88
  HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
68
89
  for (Column outputColumn: outputSchema.getColumns()) {
69
90
  for (Column inputColumn: inputSchema.getColumns()) {
@@ -74,12 +95,52 @@ public class ColumnFilterPlugin implements FilterPlugin
74
95
  }
75
96
  }
76
97
 
77
- HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
98
+ // Map outputColumn => default value if present
99
+ HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
78
100
  for (Column outputColumn: outputSchema.getColumns()) {
101
+ Type columnType = outputColumn.getType();
102
+
79
103
  for (ColumnConfig columnConfig : task.getColumns()) {
80
104
  if (columnConfig.getName().equals(outputColumn.getName())) {
81
- outputColumnConfigMap.put(outputColumn, columnConfig);
82
- break;
105
+
106
+ if (columnType instanceof BooleanType) {
107
+ if (columnConfig.getDefault().isPresent()) {
108
+ Boolean default_value = (Boolean)columnConfig.getDefault().get();
109
+ outputDefaultMap.put(outputColumn, default_value);
110
+ }
111
+ }
112
+ else if (columnType instanceof LongType) {
113
+ if (columnConfig.getDefault().isPresent()) {
114
+ Long default_value = new Long(columnConfig.getDefault().get().toString());
115
+ outputDefaultMap.put(outputColumn, default_value);
116
+ }
117
+ }
118
+ else if (columnType instanceof DoubleType) {
119
+ if (columnConfig.getDefault().isPresent()) {
120
+ Double default_value = new Double(columnConfig.getDefault().get().toString());
121
+ outputDefaultMap.put(outputColumn, default_value);
122
+ }
123
+ }
124
+ else if (columnType instanceof StringType) {
125
+ if (columnConfig.getDefault().isPresent()) {
126
+ String default_value = (String)columnConfig.getDefault().get();
127
+ outputDefaultMap.put(outputColumn, default_value);
128
+ }
129
+ }
130
+ else if (columnType instanceof TimestampType) {
131
+ if (columnConfig.getDefault().isPresent()) {
132
+ String time = (String)columnConfig.getDefault().get();
133
+ String format = (String)columnConfig.getFormat().get();
134
+ DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
135
+ TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
136
+ try {
137
+ Timestamp default_value = parser.parse(time);
138
+ outputDefaultMap.put(outputColumn, default_value);
139
+ } catch(TimestampParseException ex) {
140
+ throw Throwables.propagate(ex);
141
+ }
142
+ }
143
+ }
83
144
  }
84
145
  }
85
146
  }
@@ -120,11 +181,9 @@ public class ColumnFilterPlugin implements FilterPlugin
120
181
  public void booleanColumn(Column outputColumn) {
121
182
  Column inputColumn = outputInputColumnMap.get(outputColumn);
122
183
  if (pageReader.isNull(inputColumn)) {
123
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
124
- // ToDo: speed up
125
- if (columnConfig.getDefault().isPresent()) {
126
- boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
127
- pageBuilder.setBoolean(outputColumn, default_value);
184
+ Boolean default_value = (Boolean)outputDefaultMap.get(outputColumn);
185
+ if (default_value != null) {
186
+ pageBuilder.setBoolean(outputColumn, default_value.booleanValue());
128
187
  } else {
129
188
  pageBuilder.setNull(outputColumn);
130
189
  }
@@ -137,11 +196,9 @@ public class ColumnFilterPlugin implements FilterPlugin
137
196
  public void longColumn(Column outputColumn) {
138
197
  Column inputColumn = outputInputColumnMap.get(outputColumn);
139
198
  if (pageReader.isNull(inputColumn)) {
140
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
141
- // ToDo: speed up
142
- if (columnConfig.getDefault().isPresent()) {
143
- long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
144
- pageBuilder.setLong(outputColumn, default_value);
199
+ Long default_value = (Long)outputDefaultMap.get(outputColumn);
200
+ if (default_value != null) {
201
+ pageBuilder.setLong(outputColumn, default_value.longValue());
145
202
  } else {
146
203
  pageBuilder.setNull(outputColumn);
147
204
  }
@@ -154,10 +211,9 @@ public class ColumnFilterPlugin implements FilterPlugin
154
211
  public void doubleColumn(Column outputColumn) {
155
212
  Column inputColumn = outputInputColumnMap.get(outputColumn);
156
213
  if (pageReader.isNull(inputColumn)) {
157
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
158
- if (columnConfig.getDefault().isPresent()) {
159
- double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
160
- pageBuilder.setDouble(outputColumn, default_value);
214
+ Double default_value = (Double)outputDefaultMap.get(outputColumn);
215
+ if (default_value != null) {
216
+ pageBuilder.setDouble(outputColumn, default_value.doubleValue());
161
217
  } else {
162
218
  pageBuilder.setNull(outputColumn);
163
219
  }
@@ -170,9 +226,8 @@ public class ColumnFilterPlugin implements FilterPlugin
170
226
  public void stringColumn(Column outputColumn) {
171
227
  Column inputColumn = outputInputColumnMap.get(outputColumn);
172
228
  if (pageReader.isNull(inputColumn)) {
173
- ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
174
- if (columnConfig.getDefault().isPresent()) {
175
- String default_value = (String)columnConfig.getDefault().get();
229
+ String default_value = (String)outputDefaultMap.get(outputColumn);
230
+ if (default_value != null) {
176
231
  pageBuilder.setString(outputColumn, default_value);
177
232
  } else {
178
233
  pageBuilder.setNull(outputColumn);
@@ -186,8 +241,12 @@ public class ColumnFilterPlugin implements FilterPlugin
186
241
  public void timestampColumn(Column outputColumn) {
187
242
  Column inputColumn = outputInputColumnMap.get(outputColumn);
188
243
  if (pageReader.isNull(inputColumn)) {
189
- // ToDo: default for timestamp
190
- pageBuilder.setNull(outputColumn);
244
+ Timestamp default_value = (Timestamp)outputDefaultMap.get(outputColumn);
245
+ if (default_value != null) {
246
+ pageBuilder.setTimestamp(outputColumn, default_value);
247
+ } else {
248
+ pageBuilder.setNull(outputColumn);
249
+ }
191
250
  } else {
192
251
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
193
252
  }
@@ -13,4 +13,12 @@ public interface ColumnConfig extends Task
13
13
  @Config("default")
14
14
  @ConfigDefault("null")
15
15
  public Optional<Object> getDefault();
16
+
17
+ @Config("format")
18
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
19
+ public Optional<String> getFormat();
20
+
21
+ @Config("timezone")
22
+ @ConfigDefault("\"UTC\"")
23
+ public Optional<String> getTimezone();
16
24
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-09 00:00:00.000000000 Z
11
+ date: 2015-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -60,7 +60,7 @@ files:
60
60
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
61
61
  - src/main/java/org/embulk/filter/column/ColumnConfig.java
62
62
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
63
- - classpath/embulk-filter-column-0.1.4.jar
63
+ - classpath/embulk-filter-column-0.1.5.jar
64
64
  homepage: https://github.com/sonots/embulk-filter-column
65
65
  licenses:
66
66
  - MIT