embulk-filter-column 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/CHANGELOG.md +7 -0
- data/README.md +85 -8
- data/build.gradle +3 -3
- data/example/add_columns.yml +22 -0
- data/{example.yml → example/columns.yml} +2 -2
- data/example/drop_columns.yml +22 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +174 -63
- metadata +6 -6
- data/src/main/java/org/embulk/filter/column/ColumnConfig.java +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad887d441f728829ef11a8f5e6d6e76c5abb7ebe
|
4
|
+
data.tar.gz: 091933497cb175efdeeb0ecef88c2654dc9dd3cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a0ca14d8629cff148e580cf4f62f312fa22dbb0c6caf7fc35dea67cb4b8f699ac21f987dee17b0a497cc7310c0076613cbe1c46bc3dd9e5912c928db56101ca
|
7
|
+
data.tar.gz: 6c12d777e927becf2fd95fee004b9c021dd2b239db818e6bc539424db2386f62fd91aac4d4dc6ae8759ec6da88b96e1e95781c203ed62dbd63dacbf9a9dbad51
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,26 +6,103 @@ A filter plugin for Embulk to filter out columns
|
|
6
6
|
|
7
7
|
## Configuration
|
8
8
|
|
9
|
-
- **columns**: columns (array of hash
|
10
|
-
- **name**: name of column
|
11
|
-
- **
|
9
|
+
- **columns**: columns to retain (array of hash)
|
10
|
+
- **name**: name of column (required)
|
11
|
+
- **type**: type of column (required to add)
|
12
|
+
- **default**: default value used if input is null (required to add)
|
13
|
+
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`, required to add)
|
14
|
+
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`, required to add)
|
15
|
+
- **add_columns**: columns to add (array of hash)
|
16
|
+
- **name**: name of column (required)
|
17
|
+
- **type**: type of column (required)
|
18
|
+
- **default**: value of column (required)
|
12
19
|
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
13
20
|
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `UTC`)
|
21
|
+
- **drop_columns**: columns to drop (array of hash)
|
22
|
+
- **name**: name of column (required)
|
14
23
|
|
15
24
|
NOTE: column type is automatically retrieved from input data (inputSchema)
|
16
25
|
|
17
|
-
## Example
|
26
|
+
## Example (columns)
|
27
|
+
|
28
|
+
Say input.csv is as follows:
|
29
|
+
|
30
|
+
```
|
31
|
+
time,id,key,score
|
32
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
33
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
|
34
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
|
35
|
+
```
|
18
36
|
|
19
37
|
```yaml
|
20
38
|
filters:
|
21
39
|
- type: column
|
22
40
|
columns:
|
23
|
-
- {
|
24
|
-
- {
|
25
|
-
- {
|
41
|
+
- {key: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
42
|
+
- {key: id}
|
43
|
+
- {key: key, default: "foo"}
|
44
|
+
```
|
45
|
+
|
46
|
+
reduces columns to only `time`, `id`, and `key` columns as:
|
47
|
+
|
48
|
+
```
|
49
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY
|
50
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ
|
51
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE
|
52
|
+
```
|
53
|
+
|
54
|
+
## Example (add_columns)
|
55
|
+
|
56
|
+
Say input.csv is as follows:
|
57
|
+
|
58
|
+
```
|
59
|
+
time,id,key,score
|
60
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
61
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
|
62
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
|
63
|
+
```
|
64
|
+
|
65
|
+
```yaml
|
66
|
+
filters:
|
67
|
+
- type: column
|
68
|
+
add_columns:
|
69
|
+
- {key: d, default: "2015-07-13", format: "%Y-%m-%d"}
|
26
70
|
```
|
27
71
|
|
28
|
-
|
72
|
+
add `d` column as:
|
73
|
+
|
74
|
+
```
|
75
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
|
76
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
|
77
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
|
78
|
+
```
|
79
|
+
|
80
|
+
## Example (drop_columns)
|
81
|
+
|
82
|
+
Say input.csv is as follows:
|
83
|
+
|
84
|
+
```
|
85
|
+
time,id,key,score
|
86
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
87
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
|
88
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
|
89
|
+
```
|
90
|
+
|
91
|
+
```yaml
|
92
|
+
filters:
|
93
|
+
- type: column
|
94
|
+
drop_columns:
|
95
|
+
- {key: time}
|
96
|
+
- {key: id}
|
97
|
+
```
|
98
|
+
|
99
|
+
add `time` and `id` columns as:
|
100
|
+
|
101
|
+
```
|
102
|
+
Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370
|
103
|
+
VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962
|
104
|
+
C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323
|
105
|
+
```
|
29
106
|
|
30
107
|
## ToDo
|
31
108
|
|
data/build.gradle
CHANGED
@@ -12,13 +12,13 @@ configurations {
|
|
12
12
|
provided
|
13
13
|
}
|
14
14
|
|
15
|
-
version = "0.
|
15
|
+
version = "0.2.0"
|
16
16
|
sourceCompatibility = 1.7
|
17
17
|
targetCompatibility = 1.7
|
18
18
|
|
19
19
|
dependencies {
|
20
|
-
compile "org.embulk:embulk-core:0.
|
21
|
-
provided "org.embulk:embulk-core:0.
|
20
|
+
compile "org.embulk:embulk-core:0.7.0"
|
21
|
+
provided "org.embulk:embulk-core:0.7.0"
|
22
22
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
23
23
|
testCompile "junit:junit:4.+"
|
24
24
|
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
+
- {name: id, type: long}
|
14
|
+
- {name: name, type: string}
|
15
|
+
- {name: score, type: double}
|
16
|
+
filters:
|
17
|
+
- type: column
|
18
|
+
add_columns:
|
19
|
+
- {name: foo, type: long, default: 1 }
|
20
|
+
- {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d" }
|
21
|
+
out:
|
22
|
+
type: stdout
|
@@ -7,7 +7,7 @@
|
|
7
7
|
# score: integer
|
8
8
|
in:
|
9
9
|
type: file
|
10
|
-
path_prefix: example.csv
|
10
|
+
path_prefix: example/example.csv
|
11
11
|
parser:
|
12
12
|
type: csv
|
13
13
|
charset: UTF-8
|
@@ -25,7 +25,7 @@ filters:
|
|
25
25
|
columns:
|
26
26
|
- {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
27
27
|
- {name: name, default: "foo"}
|
28
|
-
- {name: foo, default: 1}
|
28
|
+
- {name: foo, default: 1, type: long}
|
29
29
|
- {name: id}
|
30
30
|
out:
|
31
31
|
type: stdout
|
@@ -0,0 +1,22 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: time, type: timestamp, format: "%Y-%m-%d"}
|
13
|
+
- {name: id, type: long}
|
14
|
+
- {name: name, type: string}
|
15
|
+
- {name: score, type: double}
|
16
|
+
filters:
|
17
|
+
- type: column
|
18
|
+
drop_columns:
|
19
|
+
- {name: time }
|
20
|
+
- {name: id }
|
21
|
+
out:
|
22
|
+
type: stdout
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Mon Aug 10 13:48:48 UTC 2015
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
|
@@ -9,6 +9,7 @@ import org.embulk.config.Config;
|
|
9
9
|
import org.embulk.config.ConfigDefault;
|
10
10
|
import org.embulk.config.ConfigDiff;
|
11
11
|
import org.embulk.config.ConfigSource;
|
12
|
+
import org.embulk.config.ConfigException;
|
12
13
|
import org.embulk.config.Task;
|
13
14
|
import org.embulk.config.TaskSource;
|
14
15
|
|
@@ -29,7 +30,6 @@ import org.embulk.spi.Schema;
|
|
29
30
|
import org.embulk.spi.SchemaConfig;
|
30
31
|
import org.embulk.spi.Column;
|
31
32
|
import org.embulk.spi.ColumnVisitor;
|
32
|
-
import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
|
33
33
|
|
34
34
|
import org.joda.time.DateTimeZone;
|
35
35
|
import org.embulk.spi.time.Timestamp;
|
@@ -37,6 +37,12 @@ import org.embulk.spi.time.TimestampParser;
|
|
37
37
|
import org.embulk.spi.time.TimestampParseException;
|
38
38
|
import com.google.common.base.Throwables;
|
39
39
|
|
40
|
+
import org.embulk.config.Config;
|
41
|
+
import org.embulk.config.ConfigDefault;
|
42
|
+
import com.google.common.base.Optional;
|
43
|
+
import org.jruby.embed.ScriptingContainer;
|
44
|
+
import org.embulk.spi.SchemaConfigException;
|
45
|
+
|
40
46
|
public class ColumnFilterPlugin implements FilterPlugin
|
41
47
|
{
|
42
48
|
private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
@@ -45,10 +51,42 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
45
51
|
{
|
46
52
|
}
|
47
53
|
|
54
|
+
// NOTE: This is not spi.ColumnConfig
|
55
|
+
private interface ColumnConfig extends Task
|
56
|
+
{
|
57
|
+
@Config("name")
|
58
|
+
public String getName();
|
59
|
+
|
60
|
+
@Config("type")
|
61
|
+
@ConfigDefault("null")
|
62
|
+
public Optional<Type> getType(); // required only for addColumns
|
63
|
+
|
64
|
+
@Config("default")
|
65
|
+
@ConfigDefault("null")
|
66
|
+
public Optional<Object> getDefault();
|
67
|
+
|
68
|
+
@Config("format")
|
69
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
|
70
|
+
public Optional<String> getFormat();
|
71
|
+
|
72
|
+
@Config("timezone")
|
73
|
+
@ConfigDefault("\"UTC\"")
|
74
|
+
public Optional<String> getTimezone();
|
75
|
+
}
|
76
|
+
|
48
77
|
public interface PluginTask extends Task, TimestampParser.Task
|
49
78
|
{
|
50
79
|
@Config("columns")
|
80
|
+
@ConfigDefault("[]")
|
51
81
|
public List<ColumnConfig> getColumns();
|
82
|
+
|
83
|
+
@Config("add_columns")
|
84
|
+
@ConfigDefault("[]")
|
85
|
+
public List<ColumnConfig> getAddColumns();
|
86
|
+
|
87
|
+
@Config("drop_columns")
|
88
|
+
@ConfigDefault("[]")
|
89
|
+
public List<ColumnConfig> getDropColumns();
|
52
90
|
}
|
53
91
|
|
54
92
|
@Override
|
@@ -57,25 +95,137 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
57
95
|
{
|
58
96
|
PluginTask task = config.loadConfig(PluginTask.class);
|
59
97
|
|
60
|
-
|
61
|
-
List<ColumnConfig>
|
98
|
+
List<ColumnConfig> columns = task.getColumns();
|
99
|
+
List<ColumnConfig> addColumns = task.getAddColumns();
|
100
|
+
List<ColumnConfig> dropColumns = task.getDropColumns();
|
101
|
+
|
102
|
+
if (columns.size() == 0 && addColumns.size() == 0 && dropColumns.size() == 0) {
|
103
|
+
throw new ConfigException("One of \"columns\", \"add_columns\", \"drop_columns\" must be specified.");
|
104
|
+
}
|
105
|
+
|
106
|
+
if (columns.size() > 0 && dropColumns.size() > 0) {
|
107
|
+
throw new ConfigException("Either of \"columns\", \"drop_columns\" can be specified.");
|
108
|
+
}
|
109
|
+
|
110
|
+
// Automatically get column type from inputSchema for columns and dropColumns
|
62
111
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
63
112
|
int i = 0;
|
64
|
-
|
65
|
-
String columnName = columnConfig.getName();
|
113
|
+
if (dropColumns.size() > 0) {
|
66
114
|
for (Column inputColumn: inputSchema.getColumns()) {
|
67
|
-
|
68
|
-
|
115
|
+
String name = inputColumn.getName();
|
116
|
+
boolean matched = false;
|
117
|
+
for (ColumnConfig dropColumn : dropColumns) {
|
118
|
+
if (dropColumn.getName().equals(name)) {
|
119
|
+
matched = true;
|
120
|
+
break;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
if (! matched) {
|
124
|
+
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
125
|
+
builder.add(outputColumn);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
} else if (columns.size() > 0) {
|
129
|
+
for (ColumnConfig column : columns) {
|
130
|
+
String name = column.getName();
|
131
|
+
Optional<Type> type = column.getType();
|
132
|
+
Optional<Object> defaultValue = column.getDefault();
|
133
|
+
|
134
|
+
Column inputColumn = getColumn(name, inputSchema);
|
135
|
+
if (inputColumn != null) { // filter column
|
136
|
+
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
137
|
+
builder.add(outputColumn);
|
138
|
+
}
|
139
|
+
else if (type.isPresent() && defaultValue.isPresent()) { // add column
|
140
|
+
Column outputColumn = new Column(i++, name, type.get());
|
141
|
+
builder.add(outputColumn);
|
142
|
+
}
|
143
|
+
else {
|
144
|
+
throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", name, name));
|
145
|
+
}
|
146
|
+
}
|
147
|
+
} else {
|
148
|
+
for (Column inputColumn: inputSchema.getColumns()) {
|
149
|
+
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
150
|
+
builder.add(outputColumn);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// Add columns to last. If you want to add to head or middle, you can use `columns` option
|
155
|
+
if (addColumns.size() > 0) {
|
156
|
+
for (ColumnConfig column : addColumns) {
|
157
|
+
String name = column.getName();
|
158
|
+
Optional<Type> type = column.getType();
|
159
|
+
Optional<Object> defaultValue = column.getDefault();
|
160
|
+
|
161
|
+
if (type.isPresent() && defaultValue.isPresent()) { // add column
|
162
|
+
Column outputColumn = new Column(i++, name, type.get());
|
69
163
|
builder.add(outputColumn);
|
70
|
-
|
164
|
+
}
|
165
|
+
else {
|
166
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
71
167
|
}
|
72
168
|
}
|
73
169
|
}
|
170
|
+
|
74
171
|
Schema outputSchema = new Schema(builder.build());
|
75
172
|
|
76
173
|
control.run(task.dump(), outputSchema);
|
77
174
|
}
|
78
175
|
|
176
|
+
private Column getColumn(String name, Schema schema) {
|
177
|
+
// hash should be faster, though
|
178
|
+
for (Column column: schema.getColumns()) {
|
179
|
+
if (column.getName().equals(name)) {
|
180
|
+
return column;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
return null;
|
184
|
+
}
|
185
|
+
|
186
|
+
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, ScriptingContainer jruby) {
|
187
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
188
|
+
if (columnConfig.getName().equals(name)) {
|
189
|
+
if (type instanceof BooleanType) {
|
190
|
+
if (columnConfig.getDefault().isPresent()) {
|
191
|
+
return (Boolean)columnConfig.getDefault().get();
|
192
|
+
}
|
193
|
+
}
|
194
|
+
else if (type instanceof LongType) {
|
195
|
+
if (columnConfig.getDefault().isPresent()) {
|
196
|
+
return new Long(columnConfig.getDefault().get().toString());
|
197
|
+
}
|
198
|
+
}
|
199
|
+
else if (type instanceof DoubleType) {
|
200
|
+
if (columnConfig.getDefault().isPresent()) {
|
201
|
+
return new Double(columnConfig.getDefault().get().toString());
|
202
|
+
}
|
203
|
+
}
|
204
|
+
else if (type instanceof StringType) {
|
205
|
+
if (columnConfig.getDefault().isPresent()) {
|
206
|
+
return (String)columnConfig.getDefault().get();
|
207
|
+
}
|
208
|
+
}
|
209
|
+
else if (type instanceof TimestampType) {
|
210
|
+
if (columnConfig.getDefault().isPresent()) {
|
211
|
+
String time = (String)columnConfig.getDefault().get();
|
212
|
+
String format = (String)columnConfig.getFormat().get();
|
213
|
+
DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
|
214
|
+
TimestampParser parser = new TimestampParser(jruby, format, timezone);
|
215
|
+
try {
|
216
|
+
Timestamp default_value = parser.parse(time);
|
217
|
+
return default_value;
|
218
|
+
} catch(TimestampParseException ex) {
|
219
|
+
throw Throwables.propagate(ex);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
}
|
223
|
+
return null;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
return null;
|
227
|
+
}
|
228
|
+
|
79
229
|
@Override
|
80
230
|
public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
|
81
231
|
final Schema outputSchema, final PageOutput output)
|
@@ -85,67 +235,29 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
85
235
|
// Map outputColumn => inputColumn
|
86
236
|
final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
87
237
|
for (Column outputColumn: outputSchema.getColumns()) {
|
88
|
-
|
89
|
-
|
90
|
-
outputInputColumnMap.put(outputColumn, inputColumn);
|
91
|
-
break;
|
92
|
-
}
|
93
|
-
}
|
238
|
+
Column inputColumn = getColumn(outputColumn.getName(), inputSchema);
|
239
|
+
outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
|
94
240
|
}
|
95
241
|
|
96
242
|
// Map outputColumn => default value if present
|
97
243
|
final HashMap<Column, Object> outputDefaultMap = new HashMap<Column, Object>();
|
98
244
|
for (Column outputColumn: outputSchema.getColumns()) {
|
99
|
-
|
245
|
+
String name = outputColumn.getName();
|
246
|
+
Type type = outputColumn.getType();
|
100
247
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
outputDefaultMap.put(outputColumn, default_value);
|
108
|
-
}
|
109
|
-
}
|
110
|
-
else if (columnType instanceof LongType) {
|
111
|
-
if (columnConfig.getDefault().isPresent()) {
|
112
|
-
Long default_value = new Long(columnConfig.getDefault().get().toString());
|
113
|
-
outputDefaultMap.put(outputColumn, default_value);
|
114
|
-
}
|
115
|
-
}
|
116
|
-
else if (columnType instanceof DoubleType) {
|
117
|
-
if (columnConfig.getDefault().isPresent()) {
|
118
|
-
Double default_value = new Double(columnConfig.getDefault().get().toString());
|
119
|
-
outputDefaultMap.put(outputColumn, default_value);
|
120
|
-
}
|
121
|
-
}
|
122
|
-
else if (columnType instanceof StringType) {
|
123
|
-
if (columnConfig.getDefault().isPresent()) {
|
124
|
-
String default_value = (String)columnConfig.getDefault().get();
|
125
|
-
outputDefaultMap.put(outputColumn, default_value);
|
126
|
-
}
|
127
|
-
}
|
128
|
-
else if (columnType instanceof TimestampType) {
|
129
|
-
if (columnConfig.getDefault().isPresent()) {
|
130
|
-
String time = (String)columnConfig.getDefault().get();
|
131
|
-
String format = (String)columnConfig.getFormat().get();
|
132
|
-
DateTimeZone timezone = DateTimeZone.forID((String)columnConfig.getTimezone().get());
|
133
|
-
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
134
|
-
try {
|
135
|
-
Timestamp default_value = parser.parse(time);
|
136
|
-
outputDefaultMap.put(outputColumn, default_value);
|
137
|
-
} catch(TimestampParseException ex) {
|
138
|
-
throw Throwables.propagate(ex);
|
139
|
-
}
|
140
|
-
}
|
141
|
-
}
|
142
|
-
}
|
248
|
+
Object default_value = getDefault(name, type, task.getColumns(), task.getJRuby());
|
249
|
+
if (default_value == null) {
|
250
|
+
default_value = getDefault(name, type, task.getAddColumns(), task.getJRuby());
|
251
|
+
}
|
252
|
+
if (default_value != null) {
|
253
|
+
outputDefaultMap.put(outputColumn, default_value);
|
143
254
|
}
|
144
255
|
}
|
145
256
|
|
146
257
|
return new PageOutput() {
|
147
258
|
private PageReader pageReader = new PageReader(inputSchema);
|
148
259
|
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
260
|
+
private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
149
261
|
|
150
262
|
@Override
|
151
263
|
public void finish() {
|
@@ -161,7 +273,6 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
161
273
|
public void add(Page page) {
|
162
274
|
pageReader.setPage(page);
|
163
275
|
|
164
|
-
ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
165
276
|
while (pageReader.nextRecord()) {
|
166
277
|
outputSchema.visitColumns(visitor);
|
167
278
|
pageBuilder.addRecord();
|
@@ -178,7 +289,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
178
289
|
@Override
|
179
290
|
public void booleanColumn(Column outputColumn) {
|
180
291
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
181
|
-
if (pageReader.isNull(inputColumn)) {
|
292
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
182
293
|
Boolean default_value = (Boolean)outputDefaultMap.get(outputColumn);
|
183
294
|
if (default_value != null) {
|
184
295
|
pageBuilder.setBoolean(outputColumn, default_value.booleanValue());
|
@@ -193,7 +304,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
193
304
|
@Override
|
194
305
|
public void longColumn(Column outputColumn) {
|
195
306
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
196
|
-
if (pageReader.isNull(inputColumn)) {
|
307
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
197
308
|
Long default_value = (Long)outputDefaultMap.get(outputColumn);
|
198
309
|
if (default_value != null) {
|
199
310
|
pageBuilder.setLong(outputColumn, default_value.longValue());
|
@@ -208,7 +319,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
208
319
|
@Override
|
209
320
|
public void doubleColumn(Column outputColumn) {
|
210
321
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
211
|
-
if (pageReader.isNull(inputColumn)) {
|
322
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
212
323
|
Double default_value = (Double)outputDefaultMap.get(outputColumn);
|
213
324
|
if (default_value != null) {
|
214
325
|
pageBuilder.setDouble(outputColumn, default_value.doubleValue());
|
@@ -223,7 +334,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
223
334
|
@Override
|
224
335
|
public void stringColumn(Column outputColumn) {
|
225
336
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
226
|
-
if (pageReader.isNull(inputColumn)) {
|
337
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
227
338
|
String default_value = (String)outputDefaultMap.get(outputColumn);
|
228
339
|
if (default_value != null) {
|
229
340
|
pageBuilder.setString(outputColumn, default_value);
|
@@ -238,7 +349,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
238
349
|
@Override
|
239
350
|
public void timestampColumn(Column outputColumn) {
|
240
351
|
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
241
|
-
if (pageReader.isNull(inputColumn)) {
|
352
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
242
353
|
Timestamp default_value = (Timestamp)outputDefaultMap.get(outputColumn);
|
243
354
|
if (default_value != null) {
|
244
355
|
pageBuilder.setTimestamp(outputColumn, default_value);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -51,17 +51,17 @@ files:
|
|
51
51
|
- LICENSE.txt
|
52
52
|
- README.md
|
53
53
|
- build.gradle
|
54
|
-
- example.
|
55
|
-
- example.yml
|
54
|
+
- example/add_columns.yml
|
55
|
+
- example/columns.yml
|
56
|
+
- example/drop_columns.yml
|
56
57
|
- gradle/wrapper/gradle-wrapper.jar
|
57
58
|
- gradle/wrapper/gradle-wrapper.properties
|
58
59
|
- gradlew
|
59
60
|
- gradlew.bat
|
60
61
|
- lib/embulk/filter/column.rb
|
61
62
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
62
|
-
- src/main/java/org/embulk/filter/column/ColumnConfig.java
|
63
63
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-column-0.
|
64
|
+
- classpath/embulk-filter-column-0.2.0.jar
|
65
65
|
homepage: https://github.com/sonots/embulk-filter-column
|
66
66
|
licenses:
|
67
67
|
- MIT
|
@@ -1,24 +0,0 @@
|
|
1
|
-
package org.embulk.filter.column;
|
2
|
-
|
3
|
-
import org.embulk.config.Config;
|
4
|
-
import org.embulk.config.ConfigDefault;
|
5
|
-
import org.embulk.config.Task;
|
6
|
-
import com.google.common.base.Optional;
|
7
|
-
|
8
|
-
public interface ColumnConfig extends Task
|
9
|
-
{
|
10
|
-
@Config("name")
|
11
|
-
public String getName();
|
12
|
-
|
13
|
-
@Config("default")
|
14
|
-
@ConfigDefault("null")
|
15
|
-
public Optional<Object> getDefault();
|
16
|
-
|
17
|
-
@Config("format")
|
18
|
-
@ConfigDefault("\"%Y-%m-%d %H:%M:%S.%N %z\"")
|
19
|
-
public Optional<String> getFormat();
|
20
|
-
|
21
|
-
@Config("timezone")
|
22
|
-
@ConfigDefault("\"UTC\"")
|
23
|
-
public Optional<String> getTimezone();
|
24
|
-
}
|