embulk-filter-column 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -2
- data/build.gradle +1 -1
- data/example.yml +22 -8
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +55 -21
- data/src/main/java/org/embulk/filter/column/ColumnConfig.java +16 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
|
4
|
+
data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
|
7
|
+
data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,9 @@ A filter plugin for Embulk to filter out columns
|
|
4
4
|
|
5
5
|
## Configuration
|
6
6
|
|
7
|
-
- **columns**:
|
7
|
+
- **columns**: columns (array of hash, required)
|
8
|
+
- **name**: name of column
|
9
|
+
- **default**: default value used if input is null
|
8
10
|
|
9
11
|
## Example
|
10
12
|
|
@@ -13,7 +15,7 @@ filters:
|
|
13
15
|
- type: column
|
14
16
|
columns:
|
15
17
|
- {name: id}
|
16
|
-
- {name: name}
|
18
|
+
- {name: name, default: 'foo'}
|
17
19
|
```
|
18
20
|
|
19
21
|
reduces columns to only `id` and `name` columns.
|
data/build.gradle
CHANGED
data/example.yml
CHANGED
@@ -1,15 +1,29 @@
|
|
1
|
+
# in:
|
2
|
+
# type: random
|
3
|
+
# rows: 100
|
4
|
+
# schema:
|
5
|
+
# id: primary_key
|
6
|
+
# name: string
|
7
|
+
# score: integer
|
1
8
|
in:
|
2
|
-
type:
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
9
|
+
type: file
|
10
|
+
path_prefix: example.csv
|
11
|
+
parser:
|
12
|
+
type: csv
|
13
|
+
charset: UTF-8
|
14
|
+
newline: CRLF
|
15
|
+
null_string: 'NULL'
|
16
|
+
skip_header_lines: 1
|
17
|
+
comment_line_marker: '#'
|
18
|
+
columns:
|
19
|
+
- {name: id, type: long}
|
20
|
+
- {name: name, type: string}
|
21
|
+
- {name: score, type: double}
|
8
22
|
filters:
|
9
23
|
- type: column
|
10
24
|
columns:
|
11
|
-
- {name: name}
|
12
|
-
- {name: foo}
|
25
|
+
- {name: name, default: "foo"}
|
26
|
+
- {name: foo, default: 1}
|
13
27
|
- {name: id}
|
14
28
|
out:
|
15
29
|
type: stdout
|
@@ -21,16 +21,16 @@ import org.embulk.spi.PageOutput;
|
|
21
21
|
import org.embulk.spi.PageReader;
|
22
22
|
import org.embulk.spi.Schema;
|
23
23
|
import org.embulk.spi.SchemaConfig;
|
24
|
-
import org.embulk.spi.ColumnConfig;
|
25
24
|
import org.embulk.spi.Column;
|
26
25
|
import org.embulk.spi.ColumnVisitor;
|
26
|
+
import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
|
27
27
|
|
28
28
|
public class ColumnFilterPlugin implements FilterPlugin
|
29
29
|
{
|
30
30
|
public interface PluginTask extends Task
|
31
31
|
{
|
32
32
|
@Config("columns")
|
33
|
-
public
|
33
|
+
public List<ColumnConfig> getColumns();
|
34
34
|
}
|
35
35
|
|
36
36
|
@Override
|
@@ -39,18 +39,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
39
39
|
{
|
40
40
|
PluginTask task = config.loadConfig(PluginTask.class);
|
41
41
|
|
42
|
-
//Schema outputSchema = task.getColumns().toSchema();
|
43
42
|
// Automatically get column type from inputSchema
|
44
|
-
|
45
|
-
List<ColumnConfig> outputColumnConfigs = schemaConfig.getColumns();
|
43
|
+
List<ColumnConfig> columnConfigs = task.getColumns();
|
46
44
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
47
45
|
int i = 0;
|
48
|
-
for (ColumnConfig
|
49
|
-
String
|
46
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
47
|
+
String columnName = columnConfig.getName();
|
50
48
|
for (Column inputColumn: inputSchema.getColumns()) {
|
51
|
-
if (inputColumn.getName().equals(
|
52
|
-
|
53
|
-
Column outputColumn = new Column(i++, outputColumnName, outputColumnType);
|
49
|
+
if (inputColumn.getName().equals(columnName)) {
|
50
|
+
Column outputColumn = new Column(i++, columnName, inputColumn.getType());
|
54
51
|
builder.add(outputColumn);
|
55
52
|
break;
|
56
53
|
}
|
@@ -67,11 +64,21 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
67
64
|
{
|
68
65
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
69
66
|
|
70
|
-
HashMap<Column, Column>
|
67
|
+
HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
71
68
|
for (Column outputColumn: outputSchema.getColumns()) {
|
72
69
|
for (Column inputColumn: inputSchema.getColumns()) {
|
73
70
|
if (inputColumn.getName().equals(outputColumn.getName())) {
|
74
|
-
|
71
|
+
outputInputColumnMap.put(outputColumn, inputColumn);
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
|
78
|
+
for (Column outputColumn: outputSchema.getColumns()) {
|
79
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
80
|
+
if (columnConfig.getName().equals(outputColumn.getName())) {
|
81
|
+
outputColumnConfigMap.put(outputColumn, columnConfig);
|
75
82
|
break;
|
76
83
|
}
|
77
84
|
}
|
@@ -111,9 +118,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
111
118
|
|
112
119
|
@Override
|
113
120
|
public void booleanColumn(Column outputColumn) {
|
114
|
-
Column inputColumn =
|
121
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
115
122
|
if (pageReader.isNull(inputColumn)) {
|
116
|
-
|
123
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
124
|
+
// ToDo: speed up
|
125
|
+
if (columnConfig.getDefault().isPresent()) {
|
126
|
+
boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
|
127
|
+
pageBuilder.setBoolean(outputColumn, default_value);
|
128
|
+
} else {
|
129
|
+
pageBuilder.setNull(outputColumn);
|
130
|
+
}
|
117
131
|
} else {
|
118
132
|
pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
119
133
|
}
|
@@ -121,9 +135,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
121
135
|
|
122
136
|
@Override
|
123
137
|
public void longColumn(Column outputColumn) {
|
124
|
-
Column inputColumn =
|
138
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
125
139
|
if (pageReader.isNull(inputColumn)) {
|
126
|
-
|
140
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
141
|
+
// ToDo: speed up
|
142
|
+
if (columnConfig.getDefault().isPresent()) {
|
143
|
+
long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
|
144
|
+
pageBuilder.setLong(outputColumn, default_value);
|
145
|
+
} else {
|
146
|
+
pageBuilder.setNull(outputColumn);
|
147
|
+
}
|
127
148
|
} else {
|
128
149
|
pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
|
129
150
|
}
|
@@ -131,9 +152,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
131
152
|
|
132
153
|
@Override
|
133
154
|
public void doubleColumn(Column outputColumn) {
|
134
|
-
Column inputColumn =
|
155
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
135
156
|
if (pageReader.isNull(inputColumn)) {
|
136
|
-
|
157
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
158
|
+
if (columnConfig.getDefault().isPresent()) {
|
159
|
+
double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
|
160
|
+
pageBuilder.setDouble(outputColumn, default_value);
|
161
|
+
} else {
|
162
|
+
pageBuilder.setNull(outputColumn);
|
163
|
+
}
|
137
164
|
} else {
|
138
165
|
pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
|
139
166
|
}
|
@@ -141,9 +168,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
141
168
|
|
142
169
|
@Override
|
143
170
|
public void stringColumn(Column outputColumn) {
|
144
|
-
Column inputColumn =
|
171
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
145
172
|
if (pageReader.isNull(inputColumn)) {
|
146
|
-
|
173
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
174
|
+
if (columnConfig.getDefault().isPresent()) {
|
175
|
+
String default_value = (String)columnConfig.getDefault().get();
|
176
|
+
pageBuilder.setString(outputColumn, default_value);
|
177
|
+
} else {
|
178
|
+
pageBuilder.setNull(outputColumn);
|
179
|
+
}
|
147
180
|
} else {
|
148
181
|
pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
|
149
182
|
}
|
@@ -151,8 +184,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
151
184
|
|
152
185
|
@Override
|
153
186
|
public void timestampColumn(Column outputColumn) {
|
154
|
-
Column inputColumn =
|
187
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
155
188
|
if (pageReader.isNull(inputColumn)) {
|
189
|
+
// ToDo: default for timestamp
|
156
190
|
pageBuilder.setNull(outputColumn);
|
157
191
|
} else {
|
158
192
|
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
|
8
|
+
public interface ColumnConfig extends Task
|
9
|
+
{
|
10
|
+
@Config("name")
|
11
|
+
public String getName();
|
12
|
+
|
13
|
+
@Config("default")
|
14
|
+
@ConfigDefault("null")
|
15
|
+
public Optional<Object> getDefault();
|
16
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- example.csv
|
53
54
|
- example.yml
|
54
55
|
- gradle/wrapper/gradle-wrapper.jar
|
55
56
|
- gradle/wrapper/gradle-wrapper.properties
|
@@ -57,8 +58,9 @@ files:
|
|
57
58
|
- gradlew.bat
|
58
59
|
- lib/embulk/filter/column.rb
|
59
60
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
61
|
+
- src/main/java/org/embulk/filter/column/ColumnConfig.java
|
60
62
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
61
|
-
- classpath/embulk-filter-column-0.1.
|
63
|
+
- classpath/embulk-filter-column-0.1.4.jar
|
62
64
|
homepage: https://github.com/sonots/embulk-filter-column
|
63
65
|
licenses:
|
64
66
|
- MIT
|