embulk-filter-column 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 46fc561880caa7fda6f2cdd9c191a473f0e1a146
4
- data.tar.gz: 14027774ddf37f4782c8b775203e4bec80bce490
3
+ metadata.gz: ad887d441f728829ef11a8f5e6d6e76c5abb7ebe
4
+ data.tar.gz: 091933497cb175efdeeb0ecef88c2654dc9dd3cd
5
5
  SHA512:
6
- metadata.gz: 9197bbed4fd0ffaa6196b464a8da5fa11be8cc48bc30519ae0d54900a3e06356f1ed9221c1f902b132958be82a0d4a640d15a5d688bccdaff88d0358954fdf3e
7
- data.tar.gz: 311d261b18da80495c695f695a2d15b898d18ed7acc70e100d51eb859b686324c6795bfc18c3eefe9ea5a1f7c9fe443d3a4d588cf23db4d9cc1324913232e77a
6
+ metadata.gz: 7a0ca14d8629cff148e580cf4f62f312fa22dbb0c6caf7fc35dea67cb4b8f699ac21f987dee17b0a497cc7310c0076613cbe1c46bc3dd9e5912c928db56101ca
7
+ data.tar.gz: 6c12d777e927becf2fd95fee004b9c021dd2b239db818e6bc539424db2386f62fd91aac4d4dc6ae8759ec6da88b96e1e95781c203ed62dbd63dacbf9a9dbad51
data/.travis.yml CHANGED
@@ -1,6 +1,7 @@
1
1
  language: java
2
2
  jdk:
3
+ - openjdk7
4
+ - oraclejdk7
3
5
  - oraclejdk8
4
6
  script:
5
7
  - ./gradlew test
6
-
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # 0.2.0
2
+
3
+ Enhancements:
4
+
5
+ * Add `add_columns` option
6
+ * Add `drop_columns` option
7
+
1
8
  # 0.1.6
2
9
 
3
10
  Enhancements:
data/README.md CHANGED
@@ -6,26 +6,103 @@ A filter plugin for Embulk to filter out columns
6
6
 
7
7
  ## Configuration
8
8
 
9
- - **columns**: columns (array of hash, required)
10
- - **name**: name of column
11
- - **default**: default value used if input is null
9
+ - **columns**: columns to retain (array of hash)
10
+ - **name**: name of column (required)
11
+ - **type**: type of column (required to add)
12
+ - **default**: default value used if input is null (required to add)
13
+ - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`, required to add)
14
+ - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`, required to add)
15
+ - **add_columns**: columns to add (array of hash)
16
+ - **name**: name of column (required)
17
+ - **type**: type of column (required)
18
+ - **default**: value of column (required)
12
19
  - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
13
20
  - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`)
21
+ - **drop_columns**: columns to drop (array of hash)
22
+ - **name**: name of column (required)
14
23
 
15
24
  NOTE: column type is automatically retrieved from input data (inputSchema)
16
25
 
17
- ## Example
26
+ ## Example (columns)
27
+
28
+ Say input.csv is as follows:
29
+
30
+ ```
31
+ time,id,key,score
32
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
33
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
34
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
35
+ ```
18
36
 
19
37
  ```yaml
20
38
  filters:
21
39
  - type: column
22
40
  columns:
23
- - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
24
- - {name: id}
25
- - {name: name, default: "foo"}
41
+ - {key: time, default: "2015-07-13", format: "%Y-%m-%d"}
42
+ - {key: id}
43
+ - {key: key, default: "foo"}
44
+ ```
45
+
46
+ reduces columns to only `time`, `id`, and `key` columns as:
47
+
48
+ ```
49
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY
50
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ
51
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
52
+ ```
53
+
54
+ ## Example (add_columns)
55
+
56
+ Say input.csv is as follows:
57
+
58
+ ```
59
+ time,id,key,score
60
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
61
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
62
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
63
+ ```
64
+
65
+ ```yaml
66
+ filters:
67
+ - type: column
68
+ add_columns:
69
+ - {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
26
70
  ```
27
71
 
28
- reduces columns to only `time`, `id`, and `name` columns.
72
+ add `d` column as:
73
+
74
+ ```
75
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
76
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
77
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
78
+ ```
79
+
80
+ ## Example (drop_columns)
81
+
82
+ Say input.csv is as follows:
83
+
84
+ ```
85
+ time,id,key,score
86
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
87
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
88
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
89
+ ```
90
+
91
+ ```yaml
92
+ filters:
93
+ - type: column
94
+ drop_columns:
95
+ - {key: time}
96
+ - {key: id}
97
+ ```
98
+
99
+ add `time` and `id` columns as:
100
+
101
+ ```
102
+ Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
103
+ VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
104
+ C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
105
+ ```
29
106
 
30
107
  ## ToDo
31
108
 
data/build.gradle CHANGED
@@ -12,13 +12,13 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.6"
15
+ version = "0.2.0"
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
19
  dependencies {
20
- compile "org.embulk:embulk-core:0.6.16"
21
- provided "org.embulk:embulk-core:0.6.16"
20
+ compile "org.embulk:embulk-core:0.7.0"
21
+ provided "org.embulk:embulk-core:0.7.0"
22
22
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
23
23
  testCompile "junit:junit:4.+"
24
24
  }
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
+ - {name: id, type: long}
14
+ - {name: name, type: string}
15
+ - {name: score, type: double}
16
+ filters:
17
+ - type: column
18
+ add_columns:
19
+ - {name: foo, type: long, default: 1 }
20
+ - {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d" }
21
+ out:
22
+ type: stdout
@@ -7,7 +7,7 @@
7
7
  # score: integer
8
8
  in:
9
9
  type: file
10
- path_prefix: example.csv
10
+ path_prefix: example/example.csv
11
11
  parser:
12
12
  type: csv
13
13
  charset: UTF-8
@@ -25,7 +25,7 @@ filters:
25
25
  columns:
26
26
  - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
27
27
  - {name: name, default: "foo"}
28
- - {name: foo, default: 1}
28
+ - {name: foo, default: 1, type: long}
29
29
  - {name: id}
30
30
  out:
31
31
  type: stdout
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
13
+ - {name: id, type: long}
14
+ - {name: name, type: string}
15
+ - {name: score, type: double}
16
+ filters:
17
+ - type: column
18
+ drop_columns:
19
+ - {name: time }
20
+ - {name: id }
21
+ out:
22
+ type: stdout
Binary file
@@ -1,6 +1,6 @@
1
- #Wed Feb 04 13:46:12 PST 2015
1
+ #Mon Aug 10 13:48:48 UTC 2015
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
@@ -9,6 +9,7 @@ import org.embulk.config.Config;
9
9
  import org.embulk.config.ConfigDefault;
10
10
  import org.embulk.config.ConfigDiff;
11
11
  import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.ConfigException;
12
13
  import org.embulk.config.Task;
13
14
  import org.embulk.config.TaskSource;
14
15
 
@@ -29,7 +30,6 @@ import org.embulk.spi.Schema;
29
30
  import org.embulk.spi.SchemaConfig;
30
31
  import org.embulk.spi.Column;
31
32
  import org.embulk.spi.ColumnVisitor;
32
- import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
33
33
 
34
34
  import org.joda.time.DateTimeZone;
35
35
  import org.embulk.spi.time.Timestamp;
@@ -37,6 +37,12 @@ import org.embulk.spi.time.TimestampParser;
37
37
  import org.embulk.spi.time.TimestampParseException;
38
38
  import com.google.common.base.Throwables;
39
39
 
40
+ import org.embulk.config.Config;
41
+ import org.embulk.config.ConfigDefault;
42
+ import com.google.common.base.Optional;
43
+ import org.jruby.embed.ScriptingContainer;
44
+ import org.embulk.spi.SchemaConfigException;
45
+
40
46
  public class ColumnFilterPlugin implements FilterPlugin
41
47
  {
42
48
  private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
@@ -45,10 +51,42 @@ public class ColumnFilterPlugin implements FilterPlugin
45
51
  {
46
52
  }
47
53
 
54
+ // NOTE: This is not spi.ColumnConfig
55
+ private interface ColumnConfig extends Task
56
+ {
57
+ @Config("name")
58
+ public String getName();
59
+
60
+ @Config("type")
61
+ @ConfigDefault("null")
62
+ public Optional<Type> getType(); // required only for addColumns
63
+
64
+ @Config("default")
65
+ @ConfigDefault("null")
66
+ public Optional<Object> getDefault();
67
+
68
+ @Config("format")
69
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
70
+ public Optional<String> getFormat();
71
+
72
+ @Config("timezone")
73
+ @ConfigDefault("\"UTC\"")
74
+ public Optional<String> getTimezone();
75
+ }
76
+
48
77
  public interface PluginTask extends Task, TimestampParser.Task
49
78
  {
50
79
  @Config("columns")
80
+ @ConfigDefault("[]")
51
81
  public List<ColumnConfig> getColumns();
82
+
83
+ @Config("add_columns")
84
+ @ConfigDefault("[]")
85
+ public List<ColumnConfig> getAddColumns();
86
+
87
+ @Config("drop_columns")
88
+ @ConfigDefault("[]")
89
+ public List<ColumnConfig> getDropColumns();
52
90
  }
53
91
 
54
92
  @Override
@@ -57,25 +95,137 @@ public class ColumnFilterPlugin implements FilterPlugin
57
95
  {
58
96
  PluginTask task = config.loadConfig(PluginTask.class);
59
97
 
60
- // Automatically get column type from inputSchema
61
- List<ColumnConfig> columnConfigs = task.getColumns();
98
+ List<ColumnConfig> columns = task.getColumns();
99
+ List<ColumnConfig> addColumns = task.getAddColumns();
100
+ List<ColumnConfig> dropColumns = task.getDropColumns();
101
+
102
+ if (columns.size() == 0 && addColumns.size() == 0 && dropColumns.size() == 0) {
103
+ throw new ConfigException("One of \"columns\", \"add_columns\", \"drop_columns\" must be specified.");
104
+ }
105
+
106
+ if (columns.size() > 0 && dropColumns.size() > 0) {
107
+ throw new ConfigException("Either of \"columns\", \"drop_columns\" can be specified.");
108
+ }
109
+
110
+ // Automatically get column type from inputSchema for columns and dropColumns
62
111
  ImmutableList.Builder<Column> builder = ImmutableList.builder();
63
112
  int i = 0;
64
- for (ColumnConfig columnConfig : columnConfigs) {
65
- String columnName = columnConfig.getName();
113
+ if (dropColumns.size() > 0) {
66
114
  for (Column inputColumn: inputSchema.getColumns()) {
67
- if (inputColumn.getName().equals(columnName)) {
68
- Column outputColumn = new Column(i++, columnName, inputColumn.getType());
115
+ String name = inputColumn.getName();
116
+ boolean matched = false;
117
+ for (ColumnConfig dropColumn : dropColumns) {
118
+ if (dropColumn.getName().equals(name)) {
119
+ matched = true;
120
+ break;
121
+ }
122
+ }
123
+ if (! matched) {
124
+ Column outputColumn = new Column(i++, name, inputColumn.getType());
125
+ builder.add(outputColumn);
126
+ }
127
+ }
128
+ } else if (columns.size() > 0) {
129
+ for (ColumnConfig column : columns) {
130
+ String name = column.getName();
131
+ Optional<Type> type = column.getType();
132
+ Optional<Object> defaultValue = column.getDefault();
133
+
134
+ Column inputColumn = getColumn(name, inputSchema);
135
+ if (inputColumn != null) { // filter column
136
+ Column outputColumn = new Column(i++, name, inputColumn.getType());
137
+ builder.add(outputColumn);
138
+ }
139
+ else if (type.isPresent() && defaultValue.isPresent()) { // add column
140
+ Column outputColumn = new Column(i++, name, type.get());
141
+ builder.add(outputColumn);
142
+ }
143
+ else {
144
+ throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", name, name));
145
+ }
146
+ }
147
+ } else {
148
+ for (Column inputColumn: inputSchema.getColumns()) {
149
+ Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
150
+ builder.add(outputColumn);
151
+ }
152
+ }
153
+
154
+ // Add columns to last. If you want to add to head or middle, you can use `columns` option
155
+ if (addColumns.size() > 0) {
156
+ for (ColumnConfig column : addColumns) {
157
+ String name = column.getName();
158
+ Optional<Type> type = column.getType();
159
+ Optional<Object> defaultValue = column.getDefault();
160
+
161
+ if (type.isPresent() && defaultValue.isPresent()) { // add column
162
+ Column outputColumn = new Column(i++, name, type.get());
69
163
  builder.add(outputColumn);
70
- break;
164
+ }
165
+ else {
166
+ throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
71
167
  }
72
168
  }
73
169
  }
170
+
74
171
  Schema outputSchema = new Schema(builder.build());
75
172
 
76
173
  control.run(task.dump(), outputSchema);
77
174
  }
78
175
 
176
+ private Column getColumn(String name, Schema schema) {
177
+ // hash should be faster, though
178
+ for (Column column: schema.getColumns()) {
179
+ if (column.getName().equals(name)) {
180
+ return column;
181
+ }
182
+ }
183
+ return null;
184
+ }
185
+
186
+ private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, ScriptingContainer jruby) {
187
+ for (ColumnConfig columnConfig : columnConfigs) {
188
+ if (columnConfig.getName().equals(name)) {
189
+ if (type instanceof BooleanType) {
190
+ if (columnConfig.getDefault().isPresent()) {
191
+ return (Boolean)columnConfig.getDefault().get();
192
+ }
193
+ }
194
+ else if (type instanceof LongType) {
195
+ if (columnConfig.getDefault().isPresent()) {
196
+ return new Long(columnConfig.getDefault().get().toString());
197
+ }
198
+ }
199
+ else if (type instanceof DoubleType) {
200
+ if (columnConfig.getDefault().isPresent()) {
201
+ return new Double(columnConfig.getDefault().get().toString());
202
+ }
203
+ }
204
+ else if (type instanceof StringType) {
205
+ if (columnConfig.getDefault().isPresent()) {
206
+ return (String)columnConfig.getDefault().get();
207
+ }
208
+ }
209
+ else if (type instanceof TimestampType) {
210
+ if (columnConfig.getDefault().isPresent()) {
211
+ String time = (String)columnConfig.getDefault().get();
212
+ String format = (String)columnConfig.getFormat().get();
213
+ DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
214
+ TimestampParser parser = new TimestampParser(jruby, format, timezone);
215
+ try {
216
+ Timestamp default_value = parser.parse(time);
217
+ return default_value;
218
+ } catch(TimestampParseException ex) {
219
+ throw Throwables.propagate(ex);
220
+ }
221
+ }
222
+ }
223
+ return null;
224
+ }
225
+ }
226
+ return null;
227
+ }
228
+
79
229
  @Override
80
230
  public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
81
231
  final Schema outputSchema, final PageOutput output)
@@ -85,67 +235,29 @@ public class ColumnFilterPlugin implements FilterPlugin
85
235
  // Map outputColumn => inputColumn
86
236
  final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
87
237
  for (Column outputColumn: outputSchema.getColumns()) {
88
- for (Column inputColumn: inputSchema.getColumns()) {
89
- if (inputColumn.getName().equals(outputColumn.getName())) {
90
- outputInputColumnMap.put(outputColumn, inputColumn);
91
- break;
92
- }
93
- }
238
+ Column inputColumn = getColumn(outputColumn.getName(), inputSchema);
239
+ outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
94
240
  }
95
241
 
96
242
  // Map outputColumn => default value if present
97
243
  final HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
98
244
  for (Column outputColumn: outputSchema.getColumns()) {
99
- Type columnType = outputColumn.getType();
245
+ String name = outputColumn.getName();
246
+ Type type = outputColumn.getType();
100
247
 
101
- for (ColumnConfig columnConfig : task.getColumns()) {
102
- if (columnConfig.getName().equals(outputColumn.getName())) {
103
-
104
- if (columnType instanceof BooleanType) {
105
- if (columnConfig.getDefault().isPresent()) {
106
- Boolean default_value = (Boolean)columnConfig.getDefault().get();
107
- outputDefaultMap.put(outputColumn, default_value);
108
- }
109
- }
110
- else if (columnType instanceof LongType) {
111
- if (columnConfig.getDefault().isPresent()) {
112
- Long default_value = new Long(columnConfig.getDefault().get().toString());
113
- outputDefaultMap.put(outputColumn, default_value);
114
- }
115
- }
116
- else if (columnType instanceof DoubleType) {
117
- if (columnConfig.getDefault().isPresent()) {
118
- Double default_value = new Double(columnConfig.getDefault().get().toString());
119
- outputDefaultMap.put(outputColumn, default_value);
120
- }
121
- }
122
- else if (columnType instanceof StringType) {
123
- if (columnConfig.getDefault().isPresent()) {
124
- String default_value = (String)columnConfig.getDefault().get();
125
- outputDefaultMap.put(outputColumn, default_value);
126
- }
127
- }
128
- else if (columnType instanceof TimestampType) {
129
- if (columnConfig.getDefault().isPresent()) {
130
- String time = (String)columnConfig.getDefault().get();
131
- String format = (String)columnConfig.getFormat().get();
132
- DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
133
- TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
134
- try {
135
- Timestamp default_value = parser.parse(time);
136
- outputDefaultMap.put(outputColumn, default_value);
137
- } catch(TimestampParseException ex) {
138
- throw Throwables.propagate(ex);
139
- }
140
- }
141
- }
142
- }
248
+ Object default_value = getDefault(name, type, task.getColumns(), task.getJRuby());
249
+ if (default_value == null) {
250
+ default_value = getDefault(name, type, task.getAddColumns(), task.getJRuby());
251
+ }
252
+ if (default_value != null) {
253
+ outputDefaultMap.put(outputColumn, default_value);
143
254
  }
144
255
  }
145
256
 
146
257
  return new PageOutput() {
147
258
  private PageReader pageReader = new PageReader(inputSchema);
148
259
  private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
260
+ private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
149
261
 
150
262
  @Override
151
263
  public void finish() {
@@ -161,7 +273,6 @@ public class ColumnFilterPlugin implements FilterPlugin
161
273
  public void add(Page page) {
162
274
  pageReader.setPage(page);
163
275
 
164
- ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
165
276
  while (pageReader.nextRecord()) {
166
277
  outputSchema.visitColumns(visitor);
167
278
  pageBuilder.addRecord();
@@ -178,7 +289,7 @@ public class ColumnFilterPlugin implements FilterPlugin
178
289
  @Override
179
290
  public void booleanColumn(Column outputColumn) {
180
291
  Column inputColumn = outputInputColumnMap.get(outputColumn);
181
- if (pageReader.isNull(inputColumn)) {
292
+ if (inputColumn == null || pageReader.isNull(inputColumn)) {
182
293
  Boolean default_value = (Boolean)outputDefaultMap.get(outputColumn);
183
294
  if (default_value != null) {
184
295
  pageBuilder.setBoolean(outputColumn, default_value.booleanValue());
@@ -193,7 +304,7 @@ public class ColumnFilterPlugin implements FilterPlugin
193
304
  @Override
194
305
  public void longColumn(Column outputColumn) {
195
306
  Column inputColumn = outputInputColumnMap.get(outputColumn);
196
- if (pageReader.isNull(inputColumn)) {
307
+ if (inputColumn == null || pageReader.isNull(inputColumn)) {
197
308
  Long default_value = (Long)outputDefaultMap.get(outputColumn);
198
309
  if (default_value != null) {
199
310
  pageBuilder.setLong(outputColumn, default_value.longValue());
@@ -208,7 +319,7 @@ public class ColumnFilterPlugin implements FilterPlugin
208
319
  @Override
209
320
  public void doubleColumn(Column outputColumn) {
210
321
  Column inputColumn = outputInputColumnMap.get(outputColumn);
211
- if (pageReader.isNull(inputColumn)) {
322
+ if (inputColumn == null || pageReader.isNull(inputColumn)) {
212
323
  Double default_value = (Double)outputDefaultMap.get(outputColumn);
213
324
  if (default_value != null) {
214
325
  pageBuilder.setDouble(outputColumn, default_value.doubleValue());
@@ -223,7 +334,7 @@ public class ColumnFilterPlugin implements FilterPlugin
223
334
  @Override
224
335
  public void stringColumn(Column outputColumn) {
225
336
  Column inputColumn = outputInputColumnMap.get(outputColumn);
226
- if (pageReader.isNull(inputColumn)) {
337
+ if (inputColumn == null || pageReader.isNull(inputColumn)) {
227
338
  String default_value = (String)outputDefaultMap.get(outputColumn);
228
339
  if (default_value != null) {
229
340
  pageBuilder.setString(outputColumn, default_value);
@@ -238,7 +349,7 @@ public class ColumnFilterPlugin implements FilterPlugin
238
349
  @Override
239
350
  public void timestampColumn(Column outputColumn) {
240
351
  Column inputColumn = outputInputColumnMap.get(outputColumn);
241
- if (pageReader.isNull(inputColumn)) {
352
+ if (inputColumn == null || pageReader.isNull(inputColumn)) {
242
353
  Timestamp default_value = (Timestamp)outputDefaultMap.get(outputColumn);
243
354
  if (default_value != null) {
244
355
  pageBuilder.setTimestamp(outputColumn, default_value);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-24 00:00:00.000000000 Z
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -51,17 +51,17 @@ files:
51
51
  - LICENSE.txt
52
52
  - README.md
53
53
  - build.gradle
54
- - example.csv
55
- - example.yml
54
+ - example/add_columns.yml
55
+ - example/columns.yml
56
+ - example/drop_columns.yml
56
57
  - gradle/wrapper/gradle-wrapper.jar
57
58
  - gradle/wrapper/gradle-wrapper.properties
58
59
  - gradlew
59
60
  - gradlew.bat
60
61
  - lib/embulk/filter/column.rb
61
62
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
62
- - src/main/java/org/embulk/filter/column/ColumnConfig.java
63
63
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
64
- - classpath/embulk-filter-column-0.1.6.jar
64
+ - classpath/embulk-filter-column-0.2.0.jar
65
65
  homepage: https://github.com/sonots/embulk-filter-column
66
66
  licenses:
67
67
  - MIT
@@ -1,24 +0,0 @@
1
- package org.embulk.filter.column;
2
-
3
- import org.embulk.config.Config;
4
- import org.embulk.config.ConfigDefault;
5
- import org.embulk.config.Task;
6
- import com.google.common.base.Optional;
7
-
8
- public interface ColumnConfig extends Task
9
- {
10
- @Config("name")
11
- public String getName();
12
-
13
- @Config("default")
14
- @ConfigDefault("null")
15
- public Optional<Object> getDefault();
16
-
17
- @Config("format")
18
- @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
19
- public Optional<String> getFormat();
20
-
21
- @Config("timezone")
22
- @ConfigDefault("\"UTC\"")
23
- public Optional<String> getTimezone();
24
- }