embulk-filter-column 0.4.0 → 0.5.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0.pre1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-01 00:00:00.000000000 Z
11
+ date: 2016-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,14 +56,22 @@ files:
56
56
  - example/columns.yml
57
57
  - example/drop_columns.yml
58
58
  - example/example.csv
59
+ - example/example.yml
60
+ - example/json_add_columns.yml
61
+ - example/json_columns.yml
62
+ - example/json_drop_columns.yml
59
63
  - gradle/wrapper/gradle-wrapper.jar
60
64
  - gradle/wrapper/gradle-wrapper.properties
61
65
  - gradlew
62
66
  - gradlew.bat
63
67
  - lib/embulk/filter/column.rb
64
- - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
68
+ - settings.gradle
69
+ - src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java
70
+ - src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
71
+ - src/main/java/org/embulk/filter/column/JsonColumn.java
72
+ - src/main/java/org/embulk/filter/column/JsonVisitor.java
65
73
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
66
- - classpath/embulk-filter-column-0.4.0.jar
74
+ - classpath/embulk-filter-column-0.5.0.pre1.jar
67
75
  homepage: https://github.com/sonots/embulk-filter-column
68
76
  licenses:
69
77
  - MIT
@@ -79,9 +87,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
79
87
  version: '0'
80
88
  required_rubygems_version: !ruby/object:Gem::Requirement
81
89
  requirements:
82
- - - '>='
90
+ - - '>'
83
91
  - !ruby/object:Gem::Version
84
- version: '0'
92
+ version: 1.3.1
85
93
  requirements: []
86
94
  rubyforge_project:
87
95
  rubygems_version: 2.1.9
@@ -1,462 +0,0 @@
1
- package org.embulk.filter;
2
-
3
- import com.google.common.base.Optional;
4
- import com.google.common.base.Throwables;
5
- import com.google.common.collect.ImmutableList;
6
-
7
- import org.embulk.config.Config;
8
- import org.embulk.config.ConfigDefault;
9
- import org.embulk.config.ConfigException;
10
- import org.embulk.config.ConfigSource;
11
- import org.embulk.config.Task;
12
- import org.embulk.config.TaskSource;
13
-
14
- import org.embulk.spi.Column;
15
- import org.embulk.spi.ColumnVisitor;
16
- import org.embulk.spi.Exec;
17
- import org.embulk.spi.FilterPlugin;
18
- import org.embulk.spi.Page;
19
- import org.embulk.spi.PageBuilder;
20
- import org.embulk.spi.PageOutput;
21
- import org.embulk.spi.PageReader;
22
- import org.embulk.spi.Schema;
23
- import org.embulk.spi.SchemaConfigException;
24
- import org.embulk.spi.json.JsonParser;
25
- import org.embulk.spi.time.Timestamp;
26
- import org.embulk.spi.time.TimestampParseException;
27
- import org.embulk.spi.time.TimestampParser;
28
- import org.embulk.spi.type.BooleanType;
29
- import org.embulk.spi.type.DoubleType;
30
- import org.embulk.spi.type.LongType;
31
- import org.embulk.spi.type.StringType;
32
- import org.embulk.spi.type.JsonType;
33
- import org.embulk.spi.type.TimestampType;
34
- import org.embulk.spi.type.Type;
35
-
36
- import org.joda.time.DateTimeZone;
37
- import org.msgpack.value.Value;
38
- import org.slf4j.Logger;
39
-
40
- import java.util.HashMap;
41
- import java.util.List;
42
-
43
- public class ColumnFilterPlugin implements FilterPlugin
44
- {
45
- private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
46
-
47
- public ColumnFilterPlugin()
48
- {
49
- }
50
-
51
- // NOTE: This is not spi.ColumnConfig
52
- private interface ColumnConfig extends Task
53
- {
54
- @Config("name")
55
- public String getName();
56
-
57
- @Config("type")
58
- @ConfigDefault("null")
59
- public Optional<Type> getType(); // required only for addColumns
60
-
61
- @Config("default")
62
- @ConfigDefault("null")
63
- public Optional<Object> getDefault();
64
-
65
- @Config("format")
66
- @ConfigDefault("null")
67
- public Optional<String> getFormat();
68
-
69
- @Config("timezone")
70
- @ConfigDefault("null")
71
- public Optional<DateTimeZone> getTimeZone();
72
-
73
- @Config("src")
74
- @ConfigDefault("null")
75
- public Optional<String> getSrc();
76
- }
77
-
78
- public interface PluginTask extends Task, TimestampParser.Task
79
- {
80
- @Config("columns")
81
- @ConfigDefault("[]")
82
- public List<ColumnConfig> getColumns();
83
-
84
- @Config("add_columns")
85
- @ConfigDefault("[]")
86
- public List<ColumnConfig> getAddColumns();
87
-
88
- @Config("drop_columns")
89
- @ConfigDefault("[]")
90
- public List<ColumnConfig> getDropColumns();
91
-
92
- // See TimestampParser for default_timestamp_format, and default_timezone
93
- }
94
-
95
- @Override
96
- public void transaction(final ConfigSource config, final Schema inputSchema,
97
- final FilterPlugin.Control control)
98
- {
99
- PluginTask task = config.loadConfig(PluginTask.class);
100
-
101
- List<ColumnConfig> columns = task.getColumns();
102
- List<ColumnConfig> addColumns = task.getAddColumns();
103
- List<ColumnConfig> dropColumns = task.getDropColumns();
104
-
105
- if (columns.size() == 0 && addColumns.size() == 0 && dropColumns.size() == 0) {
106
- throw new ConfigException("One of \"columns\", \"add_columns\", \"drop_columns\" must be specified.");
107
- }
108
-
109
- if (columns.size() > 0 && dropColumns.size() > 0) {
110
- throw new ConfigException("Either of \"columns\", \"drop_columns\" can be specified.");
111
- }
112
-
113
- // Automatically get column type from inputSchema for columns and dropColumns
114
- ImmutableList.Builder<Column> builder = ImmutableList.builder();
115
- int i = 0;
116
- if (dropColumns.size() > 0) {
117
- for (Column inputColumn : inputSchema.getColumns()) {
118
- String name = inputColumn.getName();
119
- boolean matched = false;
120
- for (ColumnConfig dropColumn : dropColumns) {
121
- if (dropColumn.getName().equals(name)) {
122
- matched = true;
123
- break;
124
- }
125
- }
126
- if (! matched) {
127
- Column outputColumn = new Column(i++, name, inputColumn.getType());
128
- builder.add(outputColumn);
129
- }
130
- }
131
- }
132
- else if (columns.size() > 0) {
133
- for (ColumnConfig column : columns) {
134
- String name = column.getName();
135
- Optional<Type> type = column.getType();
136
- Optional<Object> defaultValue = column.getDefault();
137
- Optional<String> src = column.getSrc();
138
-
139
- String srcName = src.isPresent() ? src.get() : name;
140
- Column inputColumn = getColumn(srcName, inputSchema);
141
- if (inputColumn != null) { // filter or copy column
142
- Column outputColumn = new Column(i++, name, inputColumn.getType());
143
- builder.add(outputColumn);
144
- }
145
- else if (type.isPresent() && defaultValue.isPresent()) { // add column
146
- Column outputColumn = new Column(i++, name, type.get());
147
- builder.add(outputColumn);
148
- }
149
- else {
150
- throw new SchemaConfigException(String.format("columns: Column src '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", srcName, name));
151
- }
152
- }
153
- }
154
- else {
155
- for (Column inputColumn : inputSchema.getColumns()) {
156
- Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
157
- builder.add(outputColumn);
158
- }
159
- }
160
-
161
- // Add columns to last. If you want to add to head or middle, you can use `columns` option
162
- if (addColumns.size() > 0) {
163
- for (ColumnConfig column : addColumns) {
164
- String name = column.getName();
165
- Optional<Type> type = column.getType();
166
- Optional<Object> defaultValue = column.getDefault();
167
- Optional<String> src = column.getSrc();
168
-
169
- String srcName = null;
170
- Column inputColumn = null;
171
- if (src.isPresent()) {
172
- srcName = src.get();
173
- inputColumn = getColumn(srcName, inputSchema);
174
- }
175
- if (inputColumn != null) { // copy column
176
- Column outputColumn = new Column(i++, name, inputColumn.getType());
177
- builder.add(outputColumn);
178
- }
179
- else if (type.isPresent() && defaultValue.isPresent()) { // add column
180
- Column outputColumn = new Column(i++, name, type.get());
181
- builder.add(outputColumn);
182
- }
183
- else {
184
- throw new SchemaConfigException(String.format("add_columns: Column src '%s' is not found in inputSchema, Column '%s' does not have \"type\" and \"default\"", srcName, name));
185
- }
186
- }
187
- }
188
-
189
- Schema outputSchema = new Schema(builder.build());
190
-
191
- control.run(task.dump(), outputSchema);
192
- }
193
-
194
- private Column getColumn(String name, Schema schema)
195
- {
196
- // hash should be faster, though
197
- for (Column column : schema.getColumns()) {
198
- if (column.getName().equals(name)) {
199
- return column;
200
- }
201
- }
202
- return null;
203
- }
204
-
205
- private String getSrc(String name, List<ColumnConfig> columnConfigs)
206
- {
207
- for (ColumnConfig columnConfig : columnConfigs) {
208
- if (columnConfig.getName().equals(name) &&
209
- columnConfig.getSrc().isPresent()) {
210
- return (String) columnConfig.getSrc().get();
211
- }
212
- }
213
- return null;
214
- }
215
-
216
- private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task)
217
- {
218
- for (ColumnConfig columnConfig : columnConfigs) {
219
- if (columnConfig.getName().equals(name)) {
220
- if (type instanceof BooleanType) {
221
- if (columnConfig.getDefault().isPresent()) {
222
- return (Boolean) columnConfig.getDefault().get();
223
- }
224
- }
225
- else if (type instanceof LongType) {
226
- if (columnConfig.getDefault().isPresent()) {
227
- return new Long(columnConfig.getDefault().get().toString());
228
- }
229
- }
230
- else if (type instanceof DoubleType) {
231
- if (columnConfig.getDefault().isPresent()) {
232
- return new Double(columnConfig.getDefault().get().toString());
233
- }
234
- }
235
- else if (type instanceof StringType) {
236
- if (columnConfig.getDefault().isPresent()) {
237
- return (String) columnConfig.getDefault().get();
238
- }
239
- }
240
- else if (type instanceof JsonType) {
241
- if (columnConfig.getDefault().isPresent()) {
242
- JsonParser parser = new JsonParser();
243
- return parser.parse((String) columnConfig.getDefault().get());
244
- }
245
- }
246
- else if (type instanceof TimestampType) {
247
- if (columnConfig.getDefault().isPresent()) {
248
- String time = (String) columnConfig.getDefault().get();
249
- String format = null;
250
- if (columnConfig.getFormat().isPresent()) {
251
- format = columnConfig.getFormat().get();
252
- }
253
- else {
254
- format = task.getDefaultTimestampFormat();
255
- }
256
- DateTimeZone timezone = null;
257
- if (columnConfig.getTimeZone().isPresent()) {
258
- timezone = columnConfig.getTimeZone().get();
259
- }
260
- else {
261
- timezone = task.getDefaultTimeZone();
262
- }
263
- TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
264
- try {
265
- Timestamp defaultValue = parser.parse(time);
266
- return defaultValue;
267
- }
268
- catch (TimestampParseException ex) {
269
- throw Throwables.propagate(ex);
270
- }
271
- }
272
- }
273
- return null;
274
- }
275
- }
276
- return null;
277
- }
278
-
279
- @Override
280
- public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
281
- final Schema outputSchema, final PageOutput output)
282
- {
283
- PluginTask task = taskSource.loadTask(PluginTask.class);
284
-
285
- // Map outputColumn => inputColumn
286
- final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
287
- for (Column outputColumn : outputSchema.getColumns()) {
288
- String name = outputColumn.getName();
289
- String srcName = getSrc(name, task.getColumns());
290
- if (srcName == null) {
291
- srcName = getSrc(name, task.getAddColumns());
292
- }
293
- if (srcName == null) {
294
- srcName = name;
295
- }
296
- Column inputColumn = getColumn(srcName, inputSchema);
297
- outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
298
- }
299
-
300
- // Map outputColumn => default value if present
301
- final HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
302
- for (Column outputColumn : outputSchema.getColumns()) {
303
- String name = outputColumn.getName();
304
- Type type = outputColumn.getType();
305
-
306
- Object defaultValue = getDefault(name, type, task.getColumns(), task);
307
- if (defaultValue == null) {
308
- defaultValue = getDefault(name, type, task.getAddColumns(), task);
309
- }
310
- if (defaultValue != null) {
311
- outputDefaultMap.put(outputColumn, defaultValue);
312
- }
313
- }
314
-
315
- return new PageOutput() {
316
- private PageReader pageReader = new PageReader(inputSchema);
317
- private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
318
- private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
319
-
320
- @Override
321
- public void finish()
322
- {
323
- pageBuilder.finish();
324
- }
325
-
326
- @Override
327
- public void close()
328
- {
329
- pageBuilder.close();
330
- }
331
-
332
- @Override
333
- public void add(Page page)
334
- {
335
- pageReader.setPage(page);
336
-
337
- while (pageReader.nextRecord()) {
338
- outputSchema.visitColumns(visitor);
339
- pageBuilder.addRecord();
340
- }
341
- }
342
-
343
- class ColumnVisitorImpl implements ColumnVisitor
344
- {
345
- private final PageBuilder pageBuilder;
346
-
347
- ColumnVisitorImpl(PageBuilder pageBuilder)
348
- {
349
- this.pageBuilder = pageBuilder;
350
- }
351
-
352
- @Override
353
- public void booleanColumn(Column outputColumn)
354
- {
355
- Column inputColumn = outputInputColumnMap.get(outputColumn);
356
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
357
- Boolean defaultValue = (Boolean) outputDefaultMap.get(outputColumn);
358
- if (defaultValue != null) {
359
- pageBuilder.setBoolean(outputColumn, defaultValue.booleanValue());
360
- }
361
- else {
362
- pageBuilder.setNull(outputColumn);
363
- }
364
- }
365
- else {
366
- pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
367
- }
368
- }
369
-
370
- @Override
371
- public void longColumn(Column outputColumn)
372
- {
373
- Column inputColumn = outputInputColumnMap.get(outputColumn);
374
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
375
- Long defaultValue = (Long) outputDefaultMap.get(outputColumn);
376
- if (defaultValue != null) {
377
- pageBuilder.setLong(outputColumn, defaultValue.longValue());
378
- }
379
- else {
380
- pageBuilder.setNull(outputColumn);
381
- }
382
- }
383
- else {
384
- pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
385
- }
386
- }
387
-
388
- @Override
389
- public void doubleColumn(Column outputColumn)
390
- {
391
- Column inputColumn = outputInputColumnMap.get(outputColumn);
392
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
393
- Double defaultValue = (Double) outputDefaultMap.get(outputColumn);
394
- if (defaultValue != null) {
395
- pageBuilder.setDouble(outputColumn, defaultValue.doubleValue());
396
- }
397
- else {
398
- pageBuilder.setNull(outputColumn);
399
- }
400
- }
401
- else {
402
- pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
403
- }
404
- }
405
-
406
- @Override
407
- public void stringColumn(Column outputColumn)
408
- {
409
- Column inputColumn = outputInputColumnMap.get(outputColumn);
410
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
411
- String defaultValue = (String) outputDefaultMap.get(outputColumn);
412
- if (defaultValue != null) {
413
- pageBuilder.setString(outputColumn, defaultValue);
414
- }
415
- else {
416
- pageBuilder.setNull(outputColumn);
417
- }
418
- }
419
- else {
420
- pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
421
- }
422
- }
423
-
424
- @Override
425
- public void jsonColumn(Column outputColumn)
426
- {
427
- Column inputColumn = outputInputColumnMap.get(outputColumn);
428
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
429
- Value defaultValue = (Value) outputDefaultMap.get(outputColumn);
430
- if (defaultValue != null) {
431
- pageBuilder.setJson(outputColumn, defaultValue);
432
- }
433
- else {
434
- pageBuilder.setNull(outputColumn);
435
- }
436
- }
437
- else {
438
- pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
439
- }
440
- }
441
-
442
- @Override
443
- public void timestampColumn(Column outputColumn)
444
- {
445
- Column inputColumn = outputInputColumnMap.get(outputColumn);
446
- if (inputColumn == null || pageReader.isNull(inputColumn)) {
447
- Timestamp defaultValue = (Timestamp) outputDefaultMap.get(outputColumn);
448
- if (defaultValue != null) {
449
- pageBuilder.setTimestamp(outputColumn, defaultValue);
450
- }
451
- else {
452
- pageBuilder.setNull(outputColumn);
453
- }
454
- }
455
- else {
456
- pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
457
- }
458
- }
459
- }
460
- };
461
- }
462
- }