embulk-filter-column 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +17 -14
- data/build.gradle +1 -1
- data/example/add_columns.yml +4 -3
- data/example/columns.yml +1 -0
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +41 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c666e71247649e05789afed9c460ac1699a459cb
|
4
|
+
data.tar.gz: ab41a4b1b49b5b46f1c50630f460cdf6be4fb1cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d56b7c8a9cb017c337b2b84649db0184629a176b259d5fec0f0db72ba7eaa5d24284a6146e30c4612a849b137e673c040b1e42769c63da4a951aed09252b749
|
7
|
+
data.tar.gz: 6a4e0c0db4a8462c9ff9612afa2fd51e6a2eab528b2318401b1d4ff15cc459c38bb94dbca501fbbe82002dca9feb74ea1b6bbd71259021a45fb7654f2b707aae
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -8,14 +8,16 @@ A filter plugin for Embulk to filter out columns
|
|
8
8
|
|
9
9
|
- **columns**: columns to retain (array of hash)
|
10
10
|
- **name**: name of column (required)
|
11
|
-
- **
|
12
|
-
- **default**: default value used if input is null (
|
11
|
+
- **src**: src column name to be copied (optional, default is `name`)
|
12
|
+
- **default**: default value used if input is null (optional)
|
13
|
+
- **type**: type of the default value (required for `default`)
|
13
14
|
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
14
15
|
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
15
16
|
- **add_columns**: columns to add (array of hash)
|
16
17
|
- **name**: name of column (required)
|
17
|
-
- **
|
18
|
-
- **default**: value of column (required)
|
18
|
+
- **src**: src column name to be copied (either of `src` or `default` is required)
|
19
|
+
- **default**: value of column (either of `src` or `default` is required)
|
20
|
+
- **type**: type of the default value (required for `default`)
|
19
21
|
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
20
22
|
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
21
23
|
- **drop_columns**: columns to drop (array of hash)
|
@@ -38,9 +40,9 @@ time,id,key,score
|
|
38
40
|
filters:
|
39
41
|
- type: column
|
40
42
|
columns:
|
41
|
-
- {
|
42
|
-
- {
|
43
|
-
- {
|
43
|
+
- {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
44
|
+
- {name: id}
|
45
|
+
- {name: key, default: "foo"}
|
44
46
|
```
|
45
47
|
|
46
48
|
reduces columns to only `time`, `id`, and `key` columns as:
|
@@ -68,15 +70,16 @@ time,id,key,score
|
|
68
70
|
filters:
|
69
71
|
- type: column
|
70
72
|
add_columns:
|
71
|
-
- {
|
73
|
+
- {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
|
74
|
+
- {name: copy_id, src: id}
|
72
75
|
```
|
73
76
|
|
74
|
-
add `d` column as:
|
77
|
+
add `d` column, and `copy_id` column which is a copy of `id` column as:
|
75
78
|
|
76
79
|
```
|
77
|
-
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
|
78
|
-
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
|
79
|
-
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
|
80
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13,0
|
81
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13,1
|
82
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13,2
|
80
83
|
```
|
81
84
|
|
82
85
|
## Example (drop_columns)
|
@@ -94,8 +97,8 @@ time,id,key,score
|
|
94
97
|
filters:
|
95
98
|
- type: column
|
96
99
|
drop_columns:
|
97
|
-
- {
|
98
|
-
- {
|
100
|
+
- {name: time}
|
101
|
+
- {name: id}
|
99
102
|
```
|
100
103
|
|
101
104
|
drop `time` and `id` columns as:
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -18,8 +18,9 @@ filters:
|
|
18
18
|
default_timezone: "Asia/Tokyo"
|
19
19
|
default_timestamp_format: "%Y-%m-%d"
|
20
20
|
add_columns:
|
21
|
-
- {name: foo, type: long, default: 1
|
22
|
-
- {name: d, type: timestamp, default: "2015-07-13"
|
23
|
-
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"
|
21
|
+
- {name: foo, type: long, default: 1}
|
22
|
+
- {name: d, type: timestamp, default: "2015-07-13"}
|
23
|
+
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
24
|
+
- {name: copy_score, src: score}
|
24
25
|
out:
|
25
26
|
type: stdout
|
data/example/columns.yml
CHANGED
@@ -71,6 +71,10 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
71
71
|
@Config("timezone")
|
72
72
|
@ConfigDefault("null")
|
73
73
|
public Optional<DateTimeZone> getTimeZone();
|
74
|
+
|
75
|
+
@Config("src")
|
76
|
+
@ConfigDefault("null")
|
77
|
+
public Optional<String> getSrc();
|
74
78
|
}
|
75
79
|
|
76
80
|
public interface PluginTask extends Task, TimestampParser.Task
|
@@ -131,9 +135,11 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
131
135
|
String name = column.getName();
|
132
136
|
Optional<Type> type = column.getType();
|
133
137
|
Optional<Object> defaultValue = column.getDefault();
|
138
|
+
Optional<String> src = column.getSrc();
|
134
139
|
|
135
|
-
|
136
|
-
|
140
|
+
String srcName = src.isPresent() ? src.get() : name;
|
141
|
+
Column inputColumn = getColumn(srcName, inputSchema);
|
142
|
+
if (inputColumn != null) { // filter or copy column
|
137
143
|
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
138
144
|
builder.add(outputColumn);
|
139
145
|
}
|
@@ -142,7 +148,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
142
148
|
builder.add(outputColumn);
|
143
149
|
}
|
144
150
|
else {
|
145
|
-
throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"",
|
151
|
+
throw new SchemaConfigException(String.format("columns: Column src '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", srcName, name));
|
146
152
|
}
|
147
153
|
}
|
148
154
|
} else {
|
@@ -158,13 +164,24 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
158
164
|
String name = column.getName();
|
159
165
|
Optional<Type> type = column.getType();
|
160
166
|
Optional<Object> defaultValue = column.getDefault();
|
167
|
+
Optional<String> src = column.getSrc();
|
161
168
|
|
162
|
-
|
169
|
+
String srcName = null;
|
170
|
+
Column inputColumn = null;
|
171
|
+
if (src.isPresent()) {
|
172
|
+
srcName = src.get();
|
173
|
+
inputColumn = getColumn(srcName, inputSchema);
|
174
|
+
}
|
175
|
+
if (inputColumn != null) { // copy column
|
176
|
+
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
177
|
+
builder.add(outputColumn);
|
178
|
+
}
|
179
|
+
else if (type.isPresent() && defaultValue.isPresent()) { // add column
|
163
180
|
Column outputColumn = new Column(i++, name, type.get());
|
164
181
|
builder.add(outputColumn);
|
165
182
|
}
|
166
183
|
else {
|
167
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
184
|
+
throw new SchemaConfigException(String.format("add_columns: Column src '%s' is not found in inputSchema, Column '%s' does not have \"type\" and \"default\"", srcName, name));
|
168
185
|
}
|
169
186
|
}
|
170
187
|
}
|
@@ -184,6 +201,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
184
201
|
return null;
|
185
202
|
}
|
186
203
|
|
204
|
+
private String getSrc(String name, List<ColumnConfig> columnConfigs) {
|
205
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
206
|
+
if (columnConfig.getName().equals(name) &&
|
207
|
+
columnConfig.getSrc().isPresent()) {
|
208
|
+
return (String)columnConfig.getSrc().get();
|
209
|
+
}
|
210
|
+
}
|
211
|
+
return null;
|
212
|
+
}
|
213
|
+
|
187
214
|
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
|
188
215
|
for (ColumnConfig columnConfig : columnConfigs) {
|
189
216
|
if (columnConfig.getName().equals(name)) {
|
@@ -246,7 +273,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
246
273
|
// Map outputColumn => inputColumn
|
247
274
|
final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
248
275
|
for (Column outputColumn: outputSchema.getColumns()) {
|
249
|
-
|
276
|
+
String name = outputColumn.getName();
|
277
|
+
String srcName = getSrc(name, task.getColumns());
|
278
|
+
if (srcName == null) {
|
279
|
+
srcName = getSrc(name, task.getAddColumns());
|
280
|
+
}
|
281
|
+
if (srcName == null) {
|
282
|
+
srcName = name;
|
283
|
+
}
|
284
|
+
Column inputColumn = getColumn(srcName, inputSchema);
|
250
285
|
outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
|
251
286
|
}
|
252
287
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- lib/embulk/filter/column.rb
|
62
62
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
63
63
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-column-0.3.
|
64
|
+
- classpath/embulk-filter-column-0.3.1.jar
|
65
65
|
homepage: https://github.com/sonots/embulk-filter-column
|
66
66
|
licenses:
|
67
67
|
- MIT
|