embulk-filter-column 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +17 -14
- data/build.gradle +1 -1
- data/example/add_columns.yml +4 -3
- data/example/columns.yml +1 -0
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +41 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c666e71247649e05789afed9c460ac1699a459cb
|
4
|
+
data.tar.gz: ab41a4b1b49b5b46f1c50630f460cdf6be4fb1cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d56b7c8a9cb017c337b2b84649db0184629a176b259d5fec0f0db72ba7eaa5d24284a6146e30c4612a849b137e673c040b1e42769c63da4a951aed09252b749
|
7
|
+
data.tar.gz: 6a4e0c0db4a8462c9ff9612afa2fd51e6a2eab528b2318401b1d4ff15cc459c38bb94dbca501fbbe82002dca9feb74ea1b6bbd71259021a45fb7654f2b707aae
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -8,14 +8,16 @@ A filter plugin for Embulk to filter out columns
|
|
8
8
|
|
9
9
|
- **columns**: columns to retain (array of hash)
|
10
10
|
- **name**: name of column (required)
|
11
|
-
- **
|
12
|
-
- **default**: default value used if input is null (
|
11
|
+
- **src**: src column name to be copied (optional, default is `name`)
|
12
|
+
- **default**: default value used if input is null (optional)
|
13
|
+
- **type**: type of the default value (required for `default`)
|
13
14
|
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
14
15
|
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
15
16
|
- **add_columns**: columns to add (array of hash)
|
16
17
|
- **name**: name of column (required)
|
17
|
-
- **
|
18
|
-
- **default**: value of column (required)
|
18
|
+
- **src**: src column name to be copied (either of `src` or `default` is required)
|
19
|
+
- **default**: value of column (either of `src` or `default` is required)
|
20
|
+
- **type**: type of the default value (required for `default`)
|
19
21
|
- **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
|
20
22
|
- **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
|
21
23
|
- **drop_columns**: columns to drop (array of hash)
|
@@ -38,9 +40,9 @@ time,id,key,score
|
|
38
40
|
filters:
|
39
41
|
- type: column
|
40
42
|
columns:
|
41
|
-
- {
|
42
|
-
- {
|
43
|
-
- {
|
43
|
+
- {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
|
44
|
+
- {name: id}
|
45
|
+
- {name: key, default: "foo"}
|
44
46
|
```
|
45
47
|
|
46
48
|
reduces columns to only `time`, `id`, and `key` columns as:
|
@@ -68,15 +70,16 @@ time,id,key,score
|
|
68
70
|
filters:
|
69
71
|
- type: column
|
70
72
|
add_columns:
|
71
|
-
- {
|
73
|
+
- {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
|
74
|
+
- {name: copy_id, src: id}
|
72
75
|
```
|
73
76
|
|
74
|
-
add `d` column as:
|
77
|
+
add `d` column, and `copy_id` column which is a copy of `id` column as:
|
75
78
|
|
76
79
|
```
|
77
|
-
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
|
78
|
-
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
|
79
|
-
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
|
80
|
+
2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13,0
|
81
|
+
2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13,1
|
82
|
+
2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13,2
|
80
83
|
```
|
81
84
|
|
82
85
|
## Example (drop_columns)
|
@@ -94,8 +97,8 @@ time,id,key,score
|
|
94
97
|
filters:
|
95
98
|
- type: column
|
96
99
|
drop_columns:
|
97
|
-
- {
|
98
|
-
- {
|
100
|
+
- {name: time}
|
101
|
+
- {name: id}
|
99
102
|
```
|
100
103
|
|
101
104
|
drop `time` and `id` columns as:
|
data/build.gradle
CHANGED
data/example/add_columns.yml
CHANGED
@@ -18,8 +18,9 @@ filters:
|
|
18
18
|
default_timezone: "Asia/Tokyo"
|
19
19
|
default_timestamp_format: "%Y-%m-%d"
|
20
20
|
add_columns:
|
21
|
-
- {name: foo, type: long, default: 1
|
22
|
-
- {name: d, type: timestamp, default: "2015-07-13"
|
23
|
-
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"
|
21
|
+
- {name: foo, type: long, default: 1}
|
22
|
+
- {name: d, type: timestamp, default: "2015-07-13"}
|
23
|
+
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
24
|
+
- {name: copy_score, src: score}
|
24
25
|
out:
|
25
26
|
type: stdout
|
data/example/columns.yml
CHANGED
@@ -71,6 +71,10 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
71
71
|
@Config("timezone")
|
72
72
|
@ConfigDefault("null")
|
73
73
|
public Optional<DateTimeZone> getTimeZone();
|
74
|
+
|
75
|
+
@Config("src")
|
76
|
+
@ConfigDefault("null")
|
77
|
+
public Optional<String> getSrc();
|
74
78
|
}
|
75
79
|
|
76
80
|
public interface PluginTask extends Task, TimestampParser.Task
|
@@ -131,9 +135,11 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
131
135
|
String name = column.getName();
|
132
136
|
Optional<Type> type = column.getType();
|
133
137
|
Optional<Object> defaultValue = column.getDefault();
|
138
|
+
Optional<String> src = column.getSrc();
|
134
139
|
|
135
|
-
|
136
|
-
|
140
|
+
String srcName = src.isPresent() ? src.get() : name;
|
141
|
+
Column inputColumn = getColumn(srcName, inputSchema);
|
142
|
+
if (inputColumn != null) { // filter or copy column
|
137
143
|
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
138
144
|
builder.add(outputColumn);
|
139
145
|
}
|
@@ -142,7 +148,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
142
148
|
builder.add(outputColumn);
|
143
149
|
}
|
144
150
|
else {
|
145
|
-
throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"",
|
151
|
+
throw new SchemaConfigException(String.format("columns: Column src '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", srcName, name));
|
146
152
|
}
|
147
153
|
}
|
148
154
|
} else {
|
@@ -158,13 +164,24 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
158
164
|
String name = column.getName();
|
159
165
|
Optional<Type> type = column.getType();
|
160
166
|
Optional<Object> defaultValue = column.getDefault();
|
167
|
+
Optional<String> src = column.getSrc();
|
161
168
|
|
162
|
-
|
169
|
+
String srcName = null;
|
170
|
+
Column inputColumn = null;
|
171
|
+
if (src.isPresent()) {
|
172
|
+
srcName = src.get();
|
173
|
+
inputColumn = getColumn(srcName, inputSchema);
|
174
|
+
}
|
175
|
+
if (inputColumn != null) { // copy column
|
176
|
+
Column outputColumn = new Column(i++, name, inputColumn.getType());
|
177
|
+
builder.add(outputColumn);
|
178
|
+
}
|
179
|
+
else if (type.isPresent() && defaultValue.isPresent()) { // add column
|
163
180
|
Column outputColumn = new Column(i++, name, type.get());
|
164
181
|
builder.add(outputColumn);
|
165
182
|
}
|
166
183
|
else {
|
167
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
184
|
+
throw new SchemaConfigException(String.format("add_columns: Column src '%s' is not found in inputSchema, Column '%s' does not have \"type\" and \"default\"", srcName, name));
|
168
185
|
}
|
169
186
|
}
|
170
187
|
}
|
@@ -184,6 +201,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
184
201
|
return null;
|
185
202
|
}
|
186
203
|
|
204
|
+
private String getSrc(String name, List<ColumnConfig> columnConfigs) {
|
205
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
206
|
+
if (columnConfig.getName().equals(name) &&
|
207
|
+
columnConfig.getSrc().isPresent()) {
|
208
|
+
return (String)columnConfig.getSrc().get();
|
209
|
+
}
|
210
|
+
}
|
211
|
+
return null;
|
212
|
+
}
|
213
|
+
|
187
214
|
private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
|
188
215
|
for (ColumnConfig columnConfig : columnConfigs) {
|
189
216
|
if (columnConfig.getName().equals(name)) {
|
@@ -246,7 +273,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
246
273
|
// Map outputColumn => inputColumn
|
247
274
|
final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
248
275
|
for (Column outputColumn: outputSchema.getColumns()) {
|
249
|
-
|
276
|
+
String name = outputColumn.getName();
|
277
|
+
String srcName = getSrc(name, task.getColumns());
|
278
|
+
if (srcName == null) {
|
279
|
+
srcName = getSrc(name, task.getAddColumns());
|
280
|
+
}
|
281
|
+
if (srcName == null) {
|
282
|
+
srcName = name;
|
283
|
+
}
|
284
|
+
Column inputColumn = getColumn(srcName, inputSchema);
|
250
285
|
outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
|
251
286
|
}
|
252
287
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- lib/embulk/filter/column.rb
|
62
62
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
63
63
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-column-0.3.
|
64
|
+
- classpath/embulk-filter-column-0.3.1.jar
|
65
65
|
homepage: https://github.com/sonots/embulk-filter-column
|
66
66
|
licenses:
|
67
67
|
- MIT
|