embulk-filter-column 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 253e3fd65c6c834be802f0b4018864e73acc4776
4
- data.tar.gz: b28d9c6008091ffeb9092933f35dc8c7f95dd257
3
+ metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
4
+ data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
5
5
  SHA512:
6
- metadata.gz: 4577f23c95ae14a8a4bedc419ef71aa60fcd5a9707e4bb55faac7098a3e2491aa0a71cbb0e2be0259291c9bc53599985bda8ac020fe77eb0b30e7ca26cfdef58
7
- data.tar.gz: fbd551878d849ff891ed96dd78a9a28efb58d071ab03f56d1ae7bc8d2359bd53228a7322645348d261bf66ad23ff7ff9809867251be35b1c3d142981bb3ba1eb
6
+ metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
7
+ data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.4
2
+
3
+ Enhancements:
4
+
5
+ * Add default option
6
+
1
7
  # 0.1.3
2
8
 
3
9
  Changes:
data/README.md CHANGED
@@ -4,7 +4,9 @@ A filter plugin for Embulk to filter out columns
4
4
 
5
5
  ## Configuration
6
6
 
7
- - **columns**: column names (array of hash, required)
7
+ - **columns**: columns (array of hash, required)
8
+ - **name**: name of column
9
+ - **default**: default value used if input is null
8
10
 
9
11
  ## Example
10
12
 
@@ -13,7 +15,7 @@ filters:
13
15
  - type: column
14
16
  columns:
15
17
  - {name: id}
16
- - {name: name}
18
+ - {name: name, default: 'foo'}
17
19
  ```
18
20
 
19
21
  reduces columns to only `id` and `name` columns.
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.3"
15
+ version = "0.1.4"
16
16
 
17
17
  dependencies {
18
18
  compile "org.embulk:embulk-core:0.6.12"
data/example.yml CHANGED
@@ -1,15 +1,29 @@
1
+ # in:
2
+ # type: random
3
+ # rows: 100
4
+ # schema:
5
+ # id: primary_key
6
+ # name: string
7
+ # score: integer
1
8
  in:
2
- type: random
3
- rows: 100
4
- schema:
5
- id: primary_key
6
- name: string
7
- score: integer
9
+ type: file
10
+ path_prefix: example.csv
11
+ parser:
12
+ type: csv
13
+ charset: UTF-8
14
+ newline: CRLF
15
+ null_string: 'NULL'
16
+ skip_header_lines: 1
17
+ comment_line_marker: '#'
18
+ columns:
19
+ - {name: id, type: long}
20
+ - {name: name, type: string}
21
+ - {name: score, type: double}
8
22
  filters:
9
23
  - type: column
10
24
  columns:
11
- - {name: name}
12
- - {name: foo}
25
+ - {name: name, default: "foo"}
26
+ - {name: foo, default: 1}
13
27
  - {name: id}
14
28
  out:
15
29
  type: stdout
@@ -21,16 +21,16 @@ import org.embulk.spi.PageOutput;
21
21
  import org.embulk.spi.PageReader;
22
22
  import org.embulk.spi.Schema;
23
23
  import org.embulk.spi.SchemaConfig;
24
- import org.embulk.spi.ColumnConfig;
25
24
  import org.embulk.spi.Column;
26
25
  import org.embulk.spi.ColumnVisitor;
26
+ import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
27
27
 
28
28
  public class ColumnFilterPlugin implements FilterPlugin
29
29
  {
30
30
  public interface PluginTask extends Task
31
31
  {
32
32
  @Config("columns")
33
- public SchemaConfig getColumns();
33
+ public List<ColumnConfig> getColumns();
34
34
  }
35
35
 
36
36
  @Override
@@ -39,18 +39,15 @@ public class ColumnFilterPlugin implements FilterPlugin
39
39
  {
40
40
  PluginTask task = config.loadConfig(PluginTask.class);
41
41
 
42
- //Schema outputSchema = task.getColumns().toSchema();
43
42
  // Automatically get column type from inputSchema
44
- SchemaConfig schemaConfig = task.getColumns();
45
- List<ColumnConfig> outputColumnConfigs = schemaConfig.getColumns();
43
+ List<ColumnConfig> columnConfigs = task.getColumns();
46
44
  ImmutableList.Builder<Column> builder = ImmutableList.builder();
47
45
  int i = 0;
48
- for (ColumnConfig outputColumnConfig : outputColumnConfigs) {
49
- String outputColumnName = outputColumnConfig.getName();
46
+ for (ColumnConfig columnConfig : columnConfigs) {
47
+ String columnName = columnConfig.getName();
50
48
  for (Column inputColumn: inputSchema.getColumns()) {
51
- if (inputColumn.getName().equals(outputColumnName)) {
52
- Type outputColumnType = inputColumn.getType();
53
- Column outputColumn = new Column(i++, outputColumnName, outputColumnType);
49
+ if (inputColumn.getName().equals(columnName)) {
50
+ Column outputColumn = new Column(i++, columnName, inputColumn.getType());
54
51
  builder.add(outputColumn);
55
52
  break;
56
53
  }
@@ -67,11 +64,21 @@ public class ColumnFilterPlugin implements FilterPlugin
67
64
  {
68
65
  PluginTask task = taskSource.loadTask(PluginTask.class);
69
66
 
70
- HashMap<Column, Column> columnMap = new HashMap<Column, Column>();
67
+ HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
71
68
  for (Column outputColumn: outputSchema.getColumns()) {
72
69
  for (Column inputColumn: inputSchema.getColumns()) {
73
70
  if (inputColumn.getName().equals(outputColumn.getName())) {
74
- columnMap.put(outputColumn, inputColumn);
71
+ outputInputColumnMap.put(outputColumn, inputColumn);
72
+ break;
73
+ }
74
+ }
75
+ }
76
+
77
+ HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
78
+ for (Column outputColumn: outputSchema.getColumns()) {
79
+ for (ColumnConfig columnConfig : task.getColumns()) {
80
+ if (columnConfig.getName().equals(outputColumn.getName())) {
81
+ outputColumnConfigMap.put(outputColumn, columnConfig);
75
82
  break;
76
83
  }
77
84
  }
@@ -111,9 +118,16 @@ public class ColumnFilterPlugin implements FilterPlugin
111
118
 
112
119
  @Override
113
120
  public void booleanColumn(Column outputColumn) {
114
- Column inputColumn = columnMap.get(outputColumn);
121
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
115
122
  if (pageReader.isNull(inputColumn)) {
116
- pageBuilder.setNull(outputColumn);
123
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
124
+ // ToDo: speed up
125
+ if (columnConfig.getDefault().isPresent()) {
126
+ boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
127
+ pageBuilder.setBoolean(outputColumn, default_value);
128
+ } else {
129
+ pageBuilder.setNull(outputColumn);
130
+ }
117
131
  } else {
118
132
  pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
119
133
  }
@@ -121,9 +135,16 @@ public class ColumnFilterPlugin implements FilterPlugin
121
135
 
122
136
  @Override
123
137
  public void longColumn(Column outputColumn) {
124
- Column inputColumn = columnMap.get(outputColumn);
138
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
125
139
  if (pageReader.isNull(inputColumn)) {
126
- pageBuilder.setNull(outputColumn);
140
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
141
+ // ToDo: speed up
142
+ if (columnConfig.getDefault().isPresent()) {
143
+ long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
144
+ pageBuilder.setLong(outputColumn, default_value);
145
+ } else {
146
+ pageBuilder.setNull(outputColumn);
147
+ }
127
148
  } else {
128
149
  pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
129
150
  }
@@ -131,9 +152,15 @@ public class ColumnFilterPlugin implements FilterPlugin
131
152
 
132
153
  @Override
133
154
  public void doubleColumn(Column outputColumn) {
134
- Column inputColumn = columnMap.get(outputColumn);
155
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
135
156
  if (pageReader.isNull(inputColumn)) {
136
- pageBuilder.setNull(outputColumn);
157
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
158
+ if (columnConfig.getDefault().isPresent()) {
159
+ double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
160
+ pageBuilder.setDouble(outputColumn, default_value);
161
+ } else {
162
+ pageBuilder.setNull(outputColumn);
163
+ }
137
164
  } else {
138
165
  pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
139
166
  }
@@ -141,9 +168,15 @@ public class ColumnFilterPlugin implements FilterPlugin
141
168
 
142
169
  @Override
143
170
  public void stringColumn(Column outputColumn) {
144
- Column inputColumn = columnMap.get(outputColumn);
171
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
145
172
  if (pageReader.isNull(inputColumn)) {
146
- pageBuilder.setNull(outputColumn);
173
+ ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
174
+ if (columnConfig.getDefault().isPresent()) {
175
+ String default_value = (String)columnConfig.getDefault().get();
176
+ pageBuilder.setString(outputColumn, default_value);
177
+ } else {
178
+ pageBuilder.setNull(outputColumn);
179
+ }
147
180
  } else {
148
181
  pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
149
182
  }
@@ -151,8 +184,9 @@ public class ColumnFilterPlugin implements FilterPlugin
151
184
 
152
185
  @Override
153
186
  public void timestampColumn(Column outputColumn) {
154
- Column inputColumn = columnMap.get(outputColumn);
187
+ Column inputColumn = outputInputColumnMap.get(outputColumn);
155
188
  if (pageReader.isNull(inputColumn)) {
189
+ // ToDo: default for timestamp
156
190
  pageBuilder.setNull(outputColumn);
157
191
  } else {
158
192
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
@@ -0,0 +1,16 @@
1
+ package org.embulk.filter.column;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+ import com.google.common.base.Optional;
7
+
8
+ public interface ColumnConfig extends Task
9
+ {
10
+ @Config("name")
11
+ public String getName();
12
+
13
+ @Config("default")
14
+ @ConfigDefault("null")
15
+ public Optional<Object> getDefault();
16
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -50,6 +50,7 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - example.csv
53
54
  - example.yml
54
55
  - gradle/wrapper/gradle-wrapper.jar
55
56
  - gradle/wrapper/gradle-wrapper.properties
@@ -57,8 +58,9 @@ files:
57
58
  - gradlew.bat
58
59
  - lib/embulk/filter/column.rb
59
60
  - src/main/java/org/embulk/filter/ColumnFilterPlugin.java
61
+ - src/main/java/org/embulk/filter/column/ColumnConfig.java
60
62
  - src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
61
- - classpath/embulk-filter-column-0.1.3.jar
63
+ - classpath/embulk-filter-column-0.1.4.jar
62
64
  homepage: https://github.com/sonots/embulk-filter-column
63
65
  licenses:
64
66
  - MIT