embulk-filter-column 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 253e3fd65c6c834be802f0b4018864e73acc4776
4
- data.tar.gz: b28d9c6008091ffeb9092933f35dc8c7f95dd257
3
+ metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
4
+ data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
5
5
  SHA512:
6
- metadata.gz: 4577f23c95ae14a8a4bedc419ef71aa60fcd5a9707e4bb55faac7098a3e2491aa0a71cbb0e2be0259291c9bc53599985bda8ac020fe77eb0b30e7ca26cfdef58
7
- data.tar.gz: fbd551878d849ff891ed96dd78a9a28efb58d071ab03f56d1ae7bc8d2359bd53228a7322645348d261bf66ad23ff7ff9809867251be35b1c3d142981bb3ba1eb
6
+ metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
7
+ data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.4
2
+
3
+ Enhancements:
4
+
5
+ * Add default option
6
+
1
7
  # 0.1.3
2
8
 
3
9
  Changes:
data/README.md CHANGED
@@ -4,7 +4,9 @@ A filter plugin for Embulk to filter out columns
4
4
 
5
5
  ## Configuration
6
6
 
7
- - **columns**: column names (array of hash, required)
7
+ - **columns**: columns (array of hash, required)
8
+ - **name**: name of column
9
+ - **default**: default value used if input is null
8
10
 
9
11
  ## Example
10
12
 
@@ -13,7 +15,7 @@ filters:
13
15
  - type: column
14
16
  columns:
15
17
  - {name: id}
16
- - {name: name}
18
+ - {name: name, default: 'foo'}
17
19
  ```
18
20
 
19
21
  reduces columns to only `id` and `name` columns.
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.3"
15
+ version = "0.1.4"
16
16
 
17
17
  dependencies {
18
18
  compile "org.embulk:embulk-core:0.6.12"
data/example.yml CHANGED
@@ -1,15 +1,29 @@
1
+ # in:
2
+ # type: random
3
+ # rows: 100
4
+ # schema:
5
+ # id: primary_key
6
+ # name: string
7
+ # score: integer
1
8
  in:
2
- type: random
3
- rows: 100
4
- schema:
5
- id: primary_key
6
- name: string
7
- score: integer
9
+ type: file
10
+ path_prefix: example.csv
11
+ parser:
12
+ type: csv
13
+ charset: UTF-8
14
+ newline: CRLF
15
+ null_string: 'NULL'
16
+ skip_header_lines: 1
17
+ comment_line_marker: '#'
18
+ columns:
19
+ - {name: id, type: long}
20
+ - {name: name, type: string}
21
+ - {name: score, type: double}
8
22
  filters:
9
23
  - type: column
10
24
  columns:
11
- - {name: name}
12
- - {name: foo}
25
+ - {name: name, default: "foo"}
26
+ - {name: foo, default: 1}
13
27
  - {name: id}
14
28
  out:
15
29
  type: stdout
@@ -21,16 +21,16 @@ import org.embulk.spi.PageOutput;
21
21
  import org.embulk.spi.PageReader;
22
22
  import org.embulk.spi.Schema;
23
23
  import org.embulk.spi.SchemaConfig;
24
- import org.embulk.spi.ColumnConfig;
25
24
  import org.embulk.spi.Column;
26
25
  import org.embulk.spi.ColumnVisitor;
26
+ import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
27
27
 
28
28
  public class ColumnFilterPlugin implements FilterPlugin
29
29
  {
30
30
  public interface PluginTask extends Task
31
31
  {
32
32
  @Config("columns")
33
- public SchemaConfig getColumns();
33
+ public List<ColumnConfig> getColumns();
34
34
  }
35
35
 
36
36
  @Override
@@ -39,18 +39,15 @@ public class ColumnFilterPlugin implements FilterPlugin
39
39
  {
40
40
  PluginTask task = config.loadConfig(PluginTask.class);
41
41
 
42
- //Schema outputSchema = task.getColumns().toSchema();
43
42
  // Automatically get column type from inputSchema
44
- SchemaConfig schemaConfig = task.getColumns();
45
- List<ColumnConfig> outputColumnConfigs = schemaConfig.getColumns();
43
+ List<ColumnConfig> columnConfigs = task.getColumns();
46
44
  ImmutableList.Builder<Column> builder = ImmutableList.builder();
47
45
  int i = 0;
48
- for (ColumnConfig outputColumnConfig : outputColumnConfigs) {
49
- String outputColumnName = outputColumnConfig.getName();
46
+ for (ColumnConfig columnConfig : columnConfigs) {
47
+ String columnName = columnConfig.getName();
50
48
  for (Column inputColumn: inputSchema.getColumns()) {
51
- if (inputColumn.getName().equals(outputColumnName)) {
52
- Type outputColumnType = inputColumn.getType();
53
- Column outputColumn = new Column(i++, outputColumnName, outputColumnType);
49
+ if (inputColumn.getName().equals(columnName)) {
50
+ Column outputColumn = new Column(i++, columnName, inputColumn.getType());
54
51
  builder.add(outputColumn);
55
52
  break;
56
53
  }
@@ -67,11 +64,21 @@ public class ColumnFilterPlugin implements FilterPlugin
67
64
  {
68
65
  PluginTask task = taskSource.loadTask(PluginTask.class);
69
66
 
70
- HashMap<Column, Column> columnMap = new HashMap<Column, Column>();
67
+ HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
71
68
  for (Column outputColumn: outputSchema.getColumns()) {
72
69
  for (Column inputColumn: inputSchema.getColumns()) {
73
70
  if (inputColumn.getName().equals(outputColumn.getName())) {
74
- columnMap.put(outputColumn, inputColumn);
71
+ outputInputColumnMap.put(outputColumn, inputColumn);
72
+ break;
73
+ }
74
+ }
75
+ }
76
+
77
+ HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
78
+ for (Column outputColumn: outputSchema.getColumns()) {
79
+ for (ColumnConfig columnConfig : task.getColumns()) {
80
+ if (columnConfig.getName().equals(outputColumn.getName())) {
81
+ outputColumnConfigMap.put(outputColumn, columnConfig);
75
82
  break;
76
83
  }
77
84
  }
@@ -111,9 +118,16 @@ public class ColumnFilterPlugin implements FilterPlugin
111
118
 
112
119
  @Override
113
120
  public void booleanColumn(Column outputColumn) {
114
- Column inputColumn = columnMap.get(outputColumn);
121
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
115
122
  if (pageReader.isNull(inputColumn)) {
116
- pageBuilder.setNull(outputColumn);
123
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
124
+ // ToDo: speed up
125
+ if (columnConfig.getDefault().isPresent()) {
126
+ boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
127
+ pageBuilder.setBoolean(outputColumn, default_value);
128
+ } else {
129
+ pageBuilder.setNull(outputColumn);
130
+ }
117
131
  } else {
118
132
  pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
119
133
  }
@@ -121,9 +135,16 @@ public class ColumnFilterPlugin implements FilterPlugin
121
135
 
122
136
  @Override
123
137
  public void longColumn(Column outputColumn) {
124
- Column inputColumn = columnMap.get(outputColumn);
138
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
125
139
  if (pageReader.isNull(inputColumn)) {
126
- pageBuilder.setNull(outputColumn);
140
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
141
+ // ToDo: speed up
142
+ if (columnConfig.getDefault().isPresent()) {
143
+ long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
144
+ pageBuilder.setLong(outputColumn, default_value);
145
+ } else {
146
+ pageBuilder.setNull(outputColumn);
147
+ }
127
148
  } else {
128
149
  pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
129
150
  }
@@ -131,9 +152,15 @@ public class ColumnFilterPlugin implements FilterPlugin
131
152
 
132
153
  @Override
133
154
  public void doubleColumn(Column outputColumn) {
134
- Column inputColumn = columnMap.get(outputColumn);
155
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
135
156
  if (pageReader.isNull(inputColumn)) {
136
- pageBuilder.setNull(outputColumn);
157
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
158
+ if (columnConfig.getDefault().isPresent()) {
159
+ double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
160
+ pageBuilder.setDouble(outputColumn, default_value);
161
+ } else {
162
+ pageBuilder.setNull(outputColumn);
163
+ }
137
164
  } else {
138
165
  pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
139
166
  }
@@ -141,9 +168,15 @@ public class ColumnFilterPlugin implements FilterPlugin
141
168
 
142
169
  @Override
143
170
  public void stringColumn(Column outputColumn) {
144
- Column inputColumn = columnMap.get(outputColumn);
171
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
145
172
  if (pageReader.isNull(inputColumn)) {
146
- pageBuilder.setNull(outputColumn);
173
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
174
+ if (columnConfig.getDefault().isPresent()) {
175
+ String default_value = (String)columnConfig.getDefault().get();
176
+ pageBuilder.setString(outputColumn, default_value);
177
+ } else {
178
+ pageBuilder.setNull(outputColumn);
179
+ }
147
180
  } else {
148
181
  pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
149
182
  }
@@ -151,8 +184,9 @@ public class ColumnFilterPlugin implements FilterPlugin
151
184
 
152
185
  @Override
153
186
  public void timestampColumn(Column outputColumn) {
154
- Column inputColumn = columnMap.get(outputColumn);
187
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
155
188
  if (pageReader.isNull(inputColumn)) {
189
+ // ToDo: default for timestamp
156
190
  pageBuilder.setNull(outputColumn);
157
191
  } else {
158
192
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
@@ -0,0 +1,16 @@
1
+ package org.embulk.filter.column;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+ import com.google.common.base.Optional;
7
+
8
+ public interface ColumnConfig extends Task
9
+ {
10
+ @Config("name")
11
+ public String getName();
12
+
13
+ @Config("default")
14
+ @ConfigDefault("null")
15
+ public Optional<Object> getDefault();
16
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -50,6 +50,7 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - example.csv
53
54
  - example.yml
54
55
  - gradle/wrapper/gradle-wrapper.jar
55
56
  - gradle/wrapper/gradle-wrapper.properties
@@ -57,8 +58,9 @@ files:
57
58
  - gradlew.bat
58
59
  - lib/embulk/filter/column.rb
59
60
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
61
+ - src/main/java/org/embulk/filter/column/ColumnConfig.java
60
62
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
61
- - classpath/embulk-filter-column-0.1.3.jar
63
+ - classpath/embulk-filter-column-0.1.4.jar
62
64
  homepage: https://github.com/sonots/embulk-filter-column
63
65
  licenses:
64
66
  - MIT