embulk-filter-column 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9337f13b7cc9bc47527a688296647de4193fd51c
4
- data.tar.gz: b0a272b270b293c95c05dcd800c384aea0ad43df
3
+ metadata.gz: c666e71247649e05789afed9c460ac1699a459cb
4
+ data.tar.gz: ab41a4b1b49b5b46f1c50630f460cdf6be4fb1cc
5
5
  SHA512:
6
- metadata.gz: e61f1a5005e3d066d21664ece6e2c1385fc8f9f955b869f26bc6ac0c0c6beab2de91238fe5eb0448b67e9c97e42bc4cc1fcf4b6a12b3af5d831082e6fc2f7930
7
- data.tar.gz: a0921c3670203b94d148339e59f016dcf5caccd949307369b917743d5a99458c21bee252de16e4506cfec94e0e958e37a1d3229b2548f3b16870c3efab2fcd52
6
+ metadata.gz: 4d56b7c8a9cb017c337b2b84649db0184629a176b259d5fec0f0db72ba7eaa5d24284a6146e30c4612a849b137e673c040b1e42769c63da4a951aed09252b749
7
+ data.tar.gz: 6a4e0c0db4a8462c9ff9612afa2fd51e6a2eab528b2318401b1d4ff15cc459c38bb94dbca501fbbe82002dca9feb74ea1b6bbd71259021a45fb7654f2b707aae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.3.1 (2015-11-09)
2
+
3
+ Enhancements:
4
+
5
+ * Add copy column feature (thanks to hidepin)
6
+
1
7
  # 0.3.0 (2015-10-27)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -8,14 +8,16 @@ A filter plugin for Embulk to filter out columns
8
8
 
9
9
  - **columns**: columns to retain (array of hash)
10
10
  - **name**: name of column (required)
11
- - **type**: type of column (required to add)
12
- - **default**: default value used if input is null (required to add)
11
+ - **src**: src column name to be copied (optional, default is `name`)
12
+ - **default**: default value used if input is null (optional)
13
+ - **type**: type of the default value (required for `default`)
13
14
  - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
14
15
  - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
15
16
  - **add_columns**: columns to add (array of hash)
16
17
  - **name**: name of column (required)
17
- - **type**: type of column (required)
18
- - **default**: value of column (required)
18
+ - **src**: src column name to be copied (either of `src` or `default` is required)
19
+ - **default**: value of column (either of `src` or `default` is required)
20
+ - **type**: type of the default value (required for `default`)
19
21
  - **format**: special option for timestamp column, specify the format of the default timestamp (string, default is `default_timestamp_format`)
20
22
  - **timezone**: special option for timestamp column, specify the timezone of the default timestamp (string, default is `default_timezone`)
21
23
  - **drop_columns**: columns to drop (array of hash)
@@ -38,9 +40,9 @@ time,id,key,score
38
40
  filters:
39
41
  - type: column
40
42
  columns:
41
- - {key: time, default: "2015-07-13", format: "%Y-%m-%d"}
42
- - {key: id}
43
- - {key: key, default: "foo"}
43
+ - {name: time, default: "2015-07-13", format: "%Y-%m-%d"}
44
+ - {name: id}
45
+ - {name: key, default: "foo"}
44
46
  ```
45
47
 
46
48
  reduces columns to only `time`, `id`, and `key` columns as:
@@ -68,15 +70,16 @@ time,id,key,score
68
70
  filters:
69
71
  - type: column
70
72
  add_columns:
71
- - {key: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
73
+ - {name: d, type: timestamp, default: "2015-07-13", format: "%Y-%m-%d"}
74
+ - {name: copy_id, src: id}
72
75
  ```
73
76
 
74
- add `d` column as:
77
+ add `d` column, and `copy_id` column which is a copy of `id` column as:
75
78
 
76
79
  ```
77
- 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13
78
- 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13
79
- 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13
80
+ 2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370,2015-07-13,0
81
+ 2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962,2015-07-13,1
82
+ 2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323,2015-07,13,2
80
83
  ```
81
84
 
82
85
  ## Example (drop_columns)
@@ -94,8 +97,8 @@ time,id,key,score
94
97
  filters:
95
98
  - type: column
96
99
  drop_columns:
97
- - {key: time}
98
- - {key: id}
100
+ - {name: time}
101
+ - {name: id}
99
102
  ```
100
103
 
101
104
  drop `time` and `id` columns as:
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.3.0"
15
+ version = "0.3.1"
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
@@ -18,8 +18,9 @@ filters:
18
18
  default_timezone: "Asia/Tokyo"
19
19
  default_timestamp_format: "%Y-%m-%d"
20
20
  add_columns:
21
- - {name: foo, type: long, default: 1 }
22
- - {name: d, type: timestamp, default: "2015-07-13" }
23
- - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S" }
21
+ - {name: foo, type: long, default: 1}
22
+ - {name: d, type: timestamp, default: "2015-07-13"}
23
+ - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
24
+ - {name: copy_score, src: score}
24
25
  out:
25
26
  type: stdout
data/example/columns.yml CHANGED
@@ -27,5 +27,6 @@ filters:
27
27
  - {name: name, default: "foo"}
28
28
  - {name: foo, default: 1, type: long}
29
29
  - {name: id}
30
+ - {name: copy_score, src: score}
30
31
  out:
31
32
  type: stdout
@@ -71,6 +71,10 @@ public class ColumnFilterPlugin implements FilterPlugin
71
71
  @Config("timezone")
72
72
  @ConfigDefault("null")
73
73
  public Optional<DateTimeZone> getTimeZone();
74
+
75
+ @Config("src")
76
+ @ConfigDefault("null")
77
+ public Optional<String> getSrc();
74
78
  }
75
79
 
76
80
  public interface PluginTask extends Task, TimestampParser.Task
@@ -131,9 +135,11 @@ public class ColumnFilterPlugin implements FilterPlugin
131
135
  String name = column.getName();
132
136
  Optional<Type> type = column.getType();
133
137
  Optional<Object> defaultValue = column.getDefault();
138
+ Optional<String> src = column.getSrc();
134
139
 
135
- Column inputColumn = getColumn(name, inputSchema);
136
- if (inputColumn != null) { // filter column
140
+ String srcName = src.isPresent() ? src.get() : name;
141
+ Column inputColumn = getColumn(srcName, inputSchema);
142
+ if (inputColumn != null) { // filter or copy column
137
143
  Column outputColumn = new Column(i++, name, inputColumn.getType());
138
144
  builder.add(outputColumn);
139
145
  }
@@ -142,7 +148,7 @@ public class ColumnFilterPlugin implements FilterPlugin
142
148
  builder.add(outputColumn);
143
149
  }
144
150
  else {
145
- throw new SchemaConfigException(String.format("columns: Column '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", name, name));
151
+ throw new SchemaConfigException(String.format("columns: Column src '%s' is not found in inputSchema. Column '%s' does not have \"type\" and \"default\"", srcName, name));
146
152
  }
147
153
  }
148
154
  } else {
@@ -158,13 +164,24 @@ public class ColumnFilterPlugin implements FilterPlugin
158
164
  String name = column.getName();
159
165
  Optional<Type> type = column.getType();
160
166
  Optional<Object> defaultValue = column.getDefault();
167
+ Optional<String> src = column.getSrc();
161
168
 
162
- if (type.isPresent() && defaultValue.isPresent()) { // add column
169
+ String srcName = null;
170
+ Column inputColumn = null;
171
+ if (src.isPresent()) {
172
+ srcName = src.get();
173
+ inputColumn = getColumn(srcName, inputSchema);
174
+ }
175
+ if (inputColumn != null) { // copy column
176
+ Column outputColumn = new Column(i++, name, inputColumn.getType());
177
+ builder.add(outputColumn);
178
+ }
179
+ else if (type.isPresent() && defaultValue.isPresent()) { // add column
163
180
  Column outputColumn = new Column(i++, name, type.get());
164
181
  builder.add(outputColumn);
165
182
  }
166
183
  else {
167
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
184
+ throw new SchemaConfigException(String.format("add_columns: Column src '%s' is not found in inputSchema, Column '%s' does not have \"type\" and \"default\"", srcName, name));
168
185
  }
169
186
  }
170
187
  }
@@ -184,6 +201,16 @@ public class ColumnFilterPlugin implements FilterPlugin
184
201
  return null;
185
202
  }
186
203
 
204
+ private String getSrc(String name, List<ColumnConfig> columnConfigs) {
205
+ for (ColumnConfig columnConfig : columnConfigs) {
206
+ if (columnConfig.getName().equals(name) &&
207
+ columnConfig.getSrc().isPresent()) {
208
+ return (String)columnConfig.getSrc().get();
209
+ }
210
+ }
211
+ return null;
212
+ }
213
+
187
214
  private Object getDefault(String name, Type type, List<ColumnConfig> columnConfigs, PluginTask task) {
188
215
  for (ColumnConfig columnConfig : columnConfigs) {
189
216
  if (columnConfig.getName().equals(name)) {
@@ -246,7 +273,15 @@ public class ColumnFilterPlugin implements FilterPlugin
246
273
  // Map outputColumn => inputColumn
247
274
  final HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
248
275
  for (Column outputColumn: outputSchema.getColumns()) {
249
- Column inputColumn = getColumn(outputColumn.getName(), inputSchema);
276
+ String name = outputColumn.getName();
277
+ String srcName = getSrc(name, task.getColumns());
278
+ if (srcName == null) {
279
+ srcName = getSrc(name, task.getAddColumns());
280
+ }
281
+ if (srcName == null) {
282
+ srcName = name;
283
+ }
284
+ Column inputColumn = getColumn(srcName, inputSchema);
250
285
  outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
251
286
  }
252
287
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2015-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -61,7 +61,7 @@ files:
61
61
  - lib/embulk/filter/column.rb
62
62
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
63
63
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
64
- - classpath/embulk-filter-column-0.3.0.jar
64
+ - classpath/embulk-filter-column-0.3.1.jar
65
65
  homepage: https://github.com/sonots/embulk-filter-column
66
66
  licenses:
67
67
  - MIT