embulk-filter-column 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -2
- data/build.gradle +1 -1
- data/example.yml +22 -8
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +55 -21
- data/src/main/java/org/embulk/filter/column/ColumnConfig.java +16 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e3f1644fcdf2eee67ab2101e46eb5f546b9c373
|
4
|
+
data.tar.gz: 825008e7b6b9c4b1c3ce5da34e42fd055b627148
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4823b5a3e833c0ae0ae6302b7043027ce7e9b8740910c9b1753d312540a01d22e32dfe42b7af65cdb5227173d988255336bcc73d2bc1a4239ca6044d62c164c6
|
7
|
+
data.tar.gz: 78fa9b4a98d493a292e65e7e3387db08d997683937dfd594a69d45aecc8304d129ded9e3f3dfa409d6591323452a1dcd00389a30f0419e7794276145bffaa9c0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,9 @@ A filter plugin for Embulk to filter out columns
|
|
4
4
|
|
5
5
|
## Configuration
|
6
6
|
|
7
|
-
- **columns**:
|
7
|
+
- **columns**: columns (array of hash, required)
|
8
|
+
- **name**: name of column
|
9
|
+
- **default**: default value used if input is null
|
8
10
|
|
9
11
|
## Example
|
10
12
|
|
@@ -13,7 +15,7 @@ filters:
|
|
13
15
|
- type: column
|
14
16
|
columns:
|
15
17
|
- {name: id}
|
16
|
-
- {name: name}
|
18
|
+
- {name: name, default: 'foo'}
|
17
19
|
```
|
18
20
|
|
19
21
|
reduces columns to only `id` and `name` columns.
|
data/build.gradle
CHANGED
data/example.yml
CHANGED
@@ -1,15 +1,29 @@
|
|
1
|
+
# in:
|
2
|
+
# type: random
|
3
|
+
# rows: 100
|
4
|
+
# schema:
|
5
|
+
# id: primary_key
|
6
|
+
# name: string
|
7
|
+
# score: integer
|
1
8
|
in:
|
2
|
-
type:
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
9
|
+
type: file
|
10
|
+
path_prefix: example.csv
|
11
|
+
parser:
|
12
|
+
type: csv
|
13
|
+
charset: UTF-8
|
14
|
+
newline: CRLF
|
15
|
+
null_string: 'NULL'
|
16
|
+
skip_header_lines: 1
|
17
|
+
comment_line_marker: '#'
|
18
|
+
columns:
|
19
|
+
- {name: id, type: long}
|
20
|
+
- {name: name, type: string}
|
21
|
+
- {name: score, type: double}
|
8
22
|
filters:
|
9
23
|
- type: column
|
10
24
|
columns:
|
11
|
-
- {name: name}
|
12
|
-
- {name: foo}
|
25
|
+
- {name: name, default: "foo"}
|
26
|
+
- {name: foo, default: 1}
|
13
27
|
- {name: id}
|
14
28
|
out:
|
15
29
|
type: stdout
|
@@ -21,16 +21,16 @@ import org.embulk.spi.PageOutput;
|
|
21
21
|
import org.embulk.spi.PageReader;
|
22
22
|
import org.embulk.spi.Schema;
|
23
23
|
import org.embulk.spi.SchemaConfig;
|
24
|
-
import org.embulk.spi.ColumnConfig;
|
25
24
|
import org.embulk.spi.Column;
|
26
25
|
import org.embulk.spi.ColumnVisitor;
|
26
|
+
import org.embulk.filter.column.ColumnConfig; // note: different with spi.ColumnConfig
|
27
27
|
|
28
28
|
public class ColumnFilterPlugin implements FilterPlugin
|
29
29
|
{
|
30
30
|
public interface PluginTask extends Task
|
31
31
|
{
|
32
32
|
@Config("columns")
|
33
|
-
public
|
33
|
+
public List<ColumnConfig> getColumns();
|
34
34
|
}
|
35
35
|
|
36
36
|
@Override
|
@@ -39,18 +39,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
39
39
|
{
|
40
40
|
PluginTask task = config.loadConfig(PluginTask.class);
|
41
41
|
|
42
|
-
//Schema outputSchema = task.getColumns().toSchema();
|
43
42
|
// Automatically get column type from inputSchema
|
44
|
-
|
45
|
-
List<ColumnConfig> outputColumnConfigs = schemaConfig.getColumns();
|
43
|
+
List<ColumnConfig> columnConfigs = task.getColumns();
|
46
44
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
47
45
|
int i = 0;
|
48
|
-
for (ColumnConfig
|
49
|
-
String
|
46
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
47
|
+
String columnName = columnConfig.getName();
|
50
48
|
for (Column inputColumn: inputSchema.getColumns()) {
|
51
|
-
if (inputColumn.getName().equals(
|
52
|
-
|
53
|
-
Column outputColumn = new Column(i++, outputColumnName, outputColumnType);
|
49
|
+
if (inputColumn.getName().equals(columnName)) {
|
50
|
+
Column outputColumn = new Column(i++, columnName, inputColumn.getType());
|
54
51
|
builder.add(outputColumn);
|
55
52
|
break;
|
56
53
|
}
|
@@ -67,11 +64,21 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
67
64
|
{
|
68
65
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
69
66
|
|
70
|
-
HashMap<Column, Column>
|
67
|
+
HashMap<Column, Column> outputInputColumnMap = new HashMap<Column, Column>();
|
71
68
|
for (Column outputColumn: outputSchema.getColumns()) {
|
72
69
|
for (Column inputColumn: inputSchema.getColumns()) {
|
73
70
|
if (inputColumn.getName().equals(outputColumn.getName())) {
|
74
|
-
|
71
|
+
outputInputColumnMap.put(outputColumn, inputColumn);
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
HashMap<Column, ColumnConfig> outputColumnConfigMap = new HashMap<Column, ColumnConfig>();
|
78
|
+
for (Column outputColumn: outputSchema.getColumns()) {
|
79
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
80
|
+
if (columnConfig.getName().equals(outputColumn.getName())) {
|
81
|
+
outputColumnConfigMap.put(outputColumn, columnConfig);
|
75
82
|
break;
|
76
83
|
}
|
77
84
|
}
|
@@ -111,9 +118,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
111
118
|
|
112
119
|
@Override
|
113
120
|
public void booleanColumn(Column outputColumn) {
|
114
|
-
Column inputColumn =
|
121
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
115
122
|
if (pageReader.isNull(inputColumn)) {
|
116
|
-
|
123
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
124
|
+
// ToDo: speed up
|
125
|
+
if (columnConfig.getDefault().isPresent()) {
|
126
|
+
boolean default_value = ((Boolean)columnConfig.getDefault().get()).booleanValue();
|
127
|
+
pageBuilder.setBoolean(outputColumn, default_value);
|
128
|
+
} else {
|
129
|
+
pageBuilder.setNull(outputColumn);
|
130
|
+
}
|
117
131
|
} else {
|
118
132
|
pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
119
133
|
}
|
@@ -121,9 +135,16 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
121
135
|
|
122
136
|
@Override
|
123
137
|
public void longColumn(Column outputColumn) {
|
124
|
-
Column inputColumn =
|
138
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
125
139
|
if (pageReader.isNull(inputColumn)) {
|
126
|
-
|
140
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
141
|
+
// ToDo: speed up
|
142
|
+
if (columnConfig.getDefault().isPresent()) {
|
143
|
+
long default_value = ((Integer)columnConfig.getDefault().get()).longValue();
|
144
|
+
pageBuilder.setLong(outputColumn, default_value);
|
145
|
+
} else {
|
146
|
+
pageBuilder.setNull(outputColumn);
|
147
|
+
}
|
127
148
|
} else {
|
128
149
|
pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
|
129
150
|
}
|
@@ -131,9 +152,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
131
152
|
|
132
153
|
@Override
|
133
154
|
public void doubleColumn(Column outputColumn) {
|
134
|
-
Column inputColumn =
|
155
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
135
156
|
if (pageReader.isNull(inputColumn)) {
|
136
|
-
|
157
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
158
|
+
if (columnConfig.getDefault().isPresent()) {
|
159
|
+
double default_value = ((Double)columnConfig.getDefault().get()).doubleValue();
|
160
|
+
pageBuilder.setDouble(outputColumn, default_value);
|
161
|
+
} else {
|
162
|
+
pageBuilder.setNull(outputColumn);
|
163
|
+
}
|
137
164
|
} else {
|
138
165
|
pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
|
139
166
|
}
|
@@ -141,9 +168,15 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
141
168
|
|
142
169
|
@Override
|
143
170
|
public void stringColumn(Column outputColumn) {
|
144
|
-
Column inputColumn =
|
171
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
145
172
|
if (pageReader.isNull(inputColumn)) {
|
146
|
-
|
173
|
+
ColumnConfig columnConfig = outputColumnConfigMap.get(outputColumn);
|
174
|
+
if (columnConfig.getDefault().isPresent()) {
|
175
|
+
String default_value = (String)columnConfig.getDefault().get();
|
176
|
+
pageBuilder.setString(outputColumn, default_value);
|
177
|
+
} else {
|
178
|
+
pageBuilder.setNull(outputColumn);
|
179
|
+
}
|
147
180
|
} else {
|
148
181
|
pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
|
149
182
|
}
|
@@ -151,8 +184,9 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
151
184
|
|
152
185
|
@Override
|
153
186
|
public void timestampColumn(Column outputColumn) {
|
154
|
-
Column inputColumn =
|
187
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
155
188
|
if (pageReader.isNull(inputColumn)) {
|
189
|
+
// ToDo: default for timestamp
|
156
190
|
pageBuilder.setNull(outputColumn);
|
157
191
|
} else {
|
158
192
|
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
|
8
|
+
public interface ColumnConfig extends Task
|
9
|
+
{
|
10
|
+
@Config("name")
|
11
|
+
public String getName();
|
12
|
+
|
13
|
+
@Config("default")
|
14
|
+
@ConfigDefault("null")
|
15
|
+
public Optional<Object> getDefault();
|
16
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- example.csv
|
53
54
|
- example.yml
|
54
55
|
- gradle/wrapper/gradle-wrapper.jar
|
55
56
|
- gradle/wrapper/gradle-wrapper.properties
|
@@ -57,8 +58,9 @@ files:
|
|
57
58
|
- gradlew.bat
|
58
59
|
- lib/embulk/filter/column.rb
|
59
60
|
- src/main/java/org/embulk/filter/ColumnFilterPlugin.java
|
61
|
+
- src/main/java/org/embulk/filter/column/ColumnConfig.java
|
60
62
|
- src/test/java/org/embulk/filter/TestColumnFilterPlugin.java
|
61
|
-
- classpath/embulk-filter-column-0.1.
|
63
|
+
- classpath/embulk-filter-column-0.1.4.jar
|
62
64
|
homepage: https://github.com/sonots/embulk-filter-column
|
63
65
|
licenses:
|
64
66
|
- MIT
|