embulk-filter-column 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
4
- data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
3
+ metadata.gz: c666e71247649e05789afed9c460ac1699a459cb
4
+ data.tar.gz: ab41a4b1b49b5b46f1c50630f460cdf6be4fb1cc
5
5
  SHA512:
6
- metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
7
- data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
6
+ metadata.gz: 4d56b7c8a9cb017c337b2b84649db0184629a176b259d5fec0f0db72ba7eaa5d24284a6146e30c4612a849b137e673c040b1e42769c63da4a951aed09252b749
7
+ data.tar.gz: 6a4e0c0db4a8462c9ff9612afa2fd51e6a2eab528b2318401b1d4ff15cc459c38bb94dbca501fbbe82002dca9feb74ea1b6bbd71259021a45fb7654f2b707aae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.3.1 (2015-11-09)
2
+
3
+ Enhancements:
4
+
5
+ * Add copy column feature (thanks to hidepin)
6
+
1
7
  # 0.3.0 (2015-10-27)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -8,14 +8,16 @@ A filter plugin for Embulk to filter out columns
8
8
 
9
9
  - **columns**: columns to retain (array of hash)
10
10
  - **name**: name of column (required)
11
- - **type**: type of column (required to add)
12
- - **default**: default value used if input is null (required to add)
11
+ - **src**: src column name to be copied (optional, default is `name`)
12
+ - **default**: default value used if input is null (optional)
13
+ - **type**: type of the default value (required for `default`)
13
14
  - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
14
15
  - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
15
16
  - **add_columns**: columns to add (array of hash)
16
17
  - **name**: name of column (required)
17
- - **type**: type of column (required)
18
- - **default**: value of column (required)
18
+ - **src**: src column name to be copied (either of `src` or `default` is required)
19
+ - **default**: value of column (either of `src` or `default` is required)
20
+ - **type**: type of the default value (required for `default`)
19
21
  - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
20
22
  - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
21
23
  - **drop_columns**: columns to drop (array of hash)
@@ -38,9 +40,9 @@ time,id,key,score
38
40
  filters:
39
41
  - type: column
40
42
  columns:
41
- - {key: time, default: "2015-07-13", format: "%Y-%m-%d"}
42
- - {key: id}
43
- - {key: key, default: "foo"}
43
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
44
+ - {name: id}
45
+ - {name: key, default: "foo"}
44
46
  ```
45
47
 
46
48
  reduces columns to only `time`, `id`, and `key` columns as:
@@ -68,15 +70,16 @@ time,id,key,score
68
70
  filters:
69
71
  - type: column
70
72
  add_columns:
71
- - {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
73
+ - {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
74
+ - {name: copy_id, src: id}
72
75
  ```
73
76
 
74
- add `d` column as:
77
+ add `d` column, and `copy_id` column which is a copy of `id` column as:
75
78
 
76
79
  ```
77
- 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
78
- 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
79
- 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
80
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13,0
81
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13,1
82
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13,2
80
83
  ```
81
84
 
82
85
  ## Example (drop_columns)
@@ -94,8 +97,8 @@ time,id,key,score
94
97
  filters:
95
98
  - type: column
96
99
  drop_columns:
97
- - {key: time}
98
- - {key: id}
100
+ - {name: time}
101
+ - {name: id}
99
102
  ```
100
103
 
101
104
  drop `time` and `id` columns as:
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.3.0"
15
+ version = "0.3.1"
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
@@ -18,8 +18,9 @@ filters:
18
18
  default_timezone: "Asia/Tokyo"
19
19
  default_timestamp_format: "%Y-%m-%d"
20
20
  add_columns:
21
- - {name: foo, type: long, default: 1 }
22
- - {name: d, type: timestamp, default: "2015-07-13" }
23
- - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
21
+ - {name: foo, type: long, default: 1}
22
+ - {name: d, type: timestamp, default: "2015-07-13"}
23
+ - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
24
+ - {name: copy_score, src: score}
24
25
  out:
25
26
  type: stdout
data/example/columns.yml CHANGED
@@ -27,5 +27,6 @@ filters:
27
27
  - {name: name, default: "foo"}
28
28
  - {name: foo, default: 1, type: long}
29
29
  - {name: id}
30
+ - {name: copy_score, src: score}
30
31
  out:
31
32
  type: stdout
@@ -71,6 +71,10 @@ public class ColumnFilterPlugin implements FilterPlugin
71
71
  @Config("timezone")
72
72
  @ConfigDefault("null")
73
73
  public Optional<DateTimeZone> getTimeZone();
74
+
75
+ @Config("src")
76
+ @ConfigDefault("null")
77
+ public Optional<String> getSrc();
74
78
  }
75
79
 
76
80
  public interface PluginTask extends Task, TimestampParser.Task
@@ -131,9 +135,11 @@ public class ColumnFilterPlugin implements FilterPlugin
131
135
  String name = column.getName();
132
136
  Optional<Type> type = column.getType();
133
137
  Optional<Object> defaultValue = column.getDefault();
138
+ Optional<String> src = column.getSrc();
134
139
 
135
- Column inputColumn = getColumn(name, inputSchema);
136
- if (inputColumn != null) { // filter column
140
+ String srcName = src.isPresent() ? src.get() : name;
141
+ Column inputColumn = getColumn(srcName, inputSchema);
142
+ if (inputColumn != null) { // filter or copy column
137
143
  Column outputColumn = new Column(i++, name, inputColumn.getType());
138
144
  builder.add(outputColumn);
139
145
  }
@@ -142,7 +148,7 @@ public class ColumnFilterPlugin implements FilterPlugin
142
148
  builder.add(outputColumn);
143
149
  }
144
150
  else {
145
- throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", name, name));
151
+ throw new SchemaConfigException(String.format("columns: Column src '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", srcName, name));
146
152
  }
147
153
  }
148
154
  } else {
@@ -158,13 +164,24 @@ public class ColumnFilterPlugin implements FilterPlugin
158
164
  String name = column.getName();
159
165
  Optional<Type> type = column.getType();
160
166
  Optional<Object> defaultValue = column.getDefault();
167
+ Optional<String> src = column.getSrc();
161
168
 
162
- if (type.isPresent() && defaultValue.isPresent()) { // add column
169
+ String srcName = null;
170
+ Column inputColumn = null;
171
+ if (src.isPresent()) {
172
+ srcName = src.get();
173
+ inputColumn = getColumn(srcName, inputSchema);
174
+ }
175
+ if (inputColumn != null) { // copy column
176
+ Column outputColumn = new Column(i++, name, inputColumn.getType());
177
+ builder.add(outputColumn);
178
+ }
179
+ else if (type.isPresent() && defaultValue.isPresent()) { // add column
163
180
  Column outputColumn = new Column(i++, name, type.get());
164
181
  builder.add(outputColumn);
165
182
  }
166
183
  else {
167
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
184
+ throw new SchemaConfigException(String.format("add_columns: Column src '%s' is not found in inputSchema, Column '%s' does not have \"type\" and \"default\"", srcName, name));
168
185
  }
169
186
  }
170
187
  }
@@ -184,6 +201,16 @@ public class ColumnFilterPlugin implements FilterPlugin
184
201
  return null;
185
202
  }
186
203
 
204
+ private String getSrc(String name, List<ColumnConfig> columnConfigs) {
205
+ for (ColumnConfig columnConfig : columnConfigs) {
206
+ if (columnConfig.getName().equals(name) &&
207
+ columnConfig.getSrc().isPresent()) {
208
+ return (String)columnConfig.getSrc().get();
209
+ }
210
+ }
211
+ return null;
212
+ }
213
+
187
214
  private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
188
215
  for (ColumnConfig columnConfig : columnConfigs) {
189
216
  if (columnConfig.getName().equals(name)) {
@@ -246,7 +273,15 @@ public class ColumnFilterPlugin implements FilterPlugin
246
273
  // Map outputColumn => inputColumn
247
274
  final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
248
275
  for (Column outputColumn: outputSchema.getColumns()) {
249
- Column inputColumn = getColumn(outputColumn.getName(), inputSchema);
276
+ String name = outputColumn.getName();
277
+ String srcName = getSrc(name, task.getColumns());
278
+ if (srcName == null) {
279
+ srcName = getSrc(name, task.getAddColumns());
280
+ }
281
+ if (srcName == null) {
282
+ srcName = name;
283
+ }
284
+ Column inputColumn = getColumn(srcName, inputSchema);
250
285
  outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
251
286
  }
252
287
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2015-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -61,7 +61,7 @@ files:
61
61
  - lib/embulk/filter/column.rb
62
62
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
63
63
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
64
- - classpath/embulk-filter-column-0.3.0.jar
64
+ - classpath/embulk-filter-column-0.3.1.jar
65
65
  homepage: https://github.com/sonots/embulk-filter-column
66
66
  licenses:
67
67
  - MIT