embulk-filter-row 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +6 -0
- data/README.md +124 -4
- data/build.gradle +15 -4
- data/classpath/embulk-filter-row-0.3.0.jar +0 -0
- data/example/and.yml +0 -7
- data/example/example.yml +0 -7
- data/example/where.yml +28 -0
- data/script/byaccj.sh +29 -0
- data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
- data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
- data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
- data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
- data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
- data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
- data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
- data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
- data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
- data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
- data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
- data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
- data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
- data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
- data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
- data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
- metadata +19 -5
- data/classpath/embulk-filter-row-0.2.2.jar +0 -0
data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java}
RENAMED
@@ -9,32 +9,37 @@ import org.embulk.filter.row.condition.StringCondition;
|
|
9
9
|
import org.embulk.filter.row.condition.TimestampCondition;
|
10
10
|
|
11
11
|
import org.embulk.spi.Column;
|
12
|
+
import org.embulk.spi.ColumnVisitor;
|
12
13
|
import org.embulk.spi.Exec;
|
13
|
-
import org.embulk.spi.PageBuilder;
|
14
14
|
import org.embulk.spi.PageReader;
|
15
15
|
import org.embulk.spi.Schema;
|
16
16
|
import org.embulk.spi.time.Timestamp;
|
17
17
|
|
18
18
|
import org.slf4j.Logger;
|
19
19
|
|
20
|
+
import java.util.HashMap;
|
20
21
|
import java.util.List;
|
21
22
|
|
22
|
-
class
|
23
|
+
class GuardColumnVisitorOrImpl
|
24
|
+
extends AbstractGuardColumnVisitor
|
25
|
+
implements ColumnVisitor
|
23
26
|
{
|
24
27
|
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
25
28
|
private boolean shouldAddRecord;
|
29
|
+
private HashMap<String, List<Condition>> conditionMap;
|
26
30
|
|
27
|
-
|
31
|
+
GuardColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader)
|
28
32
|
{
|
29
|
-
super(task, inputSchema, outputSchema, pageReader
|
33
|
+
super(task, inputSchema, outputSchema, pageReader);
|
34
|
+
this.conditionMap = buildConditionMap(task, outputSchema);
|
30
35
|
}
|
31
36
|
|
32
|
-
public boolean visitColumns(Schema
|
37
|
+
public boolean visitColumns(Schema inputSchema)
|
33
38
|
{
|
34
39
|
//Visitor objects are created for each thread :)
|
35
40
|
//System.out.println(String.format("thread_id:%d object_id:%d", Thread.currentThread().getId(), this.hashCode()));
|
36
41
|
shouldAddRecord = false;
|
37
|
-
for (Column column :
|
42
|
+
for (Column column : inputSchema.getColumns()) {
|
38
43
|
column.visit(this);
|
39
44
|
}
|
40
45
|
return shouldAddRecord;
|
@@ -43,12 +48,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
43
48
|
@Override
|
44
49
|
public void booleanColumn(Column column)
|
45
50
|
{
|
46
|
-
if (pageReader.isNull(column)) {
|
47
|
-
pageBuilder.setNull(column);
|
48
|
-
}
|
49
|
-
else {
|
50
|
-
pageBuilder.setBoolean(column, pageReader.getBoolean(column));
|
51
|
-
}
|
52
51
|
if (shouldAddRecord) {
|
53
52
|
return;
|
54
53
|
}
|
@@ -74,12 +73,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
74
73
|
@Override
|
75
74
|
public void longColumn(Column column)
|
76
75
|
{
|
77
|
-
if (pageReader.isNull(column)) {
|
78
|
-
pageBuilder.setNull(column);
|
79
|
-
}
|
80
|
-
else {
|
81
|
-
pageBuilder.setLong(column, pageReader.getLong(column));
|
82
|
-
}
|
83
76
|
if (shouldAddRecord) {
|
84
77
|
return;
|
85
78
|
}
|
@@ -105,12 +98,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
105
98
|
@Override
|
106
99
|
public void doubleColumn(Column column)
|
107
100
|
{
|
108
|
-
if (pageReader.isNull(column)) {
|
109
|
-
pageBuilder.setNull(column);
|
110
|
-
}
|
111
|
-
else {
|
112
|
-
pageBuilder.setDouble(column, pageReader.getDouble(column));
|
113
|
-
}
|
114
101
|
if (shouldAddRecord) {
|
115
102
|
return;
|
116
103
|
}
|
@@ -136,12 +123,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
136
123
|
@Override
|
137
124
|
public void stringColumn(Column column)
|
138
125
|
{
|
139
|
-
if (pageReader.isNull(column)) {
|
140
|
-
pageBuilder.setNull(column);
|
141
|
-
}
|
142
|
-
else {
|
143
|
-
pageBuilder.setString(column, pageReader.getString(column));
|
144
|
-
}
|
145
126
|
if (shouldAddRecord) {
|
146
127
|
return;
|
147
128
|
}
|
@@ -167,12 +148,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
167
148
|
@Override
|
168
149
|
public void timestampColumn(Column column)
|
169
150
|
{
|
170
|
-
if (pageReader.isNull(column)) {
|
171
|
-
pageBuilder.setNull(column);
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
|
175
|
-
}
|
176
151
|
if (shouldAddRecord) {
|
177
152
|
return;
|
178
153
|
}
|
@@ -198,14 +173,5 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
198
173
|
@Override
|
199
174
|
public void jsonColumn(Column column)
|
200
175
|
{
|
201
|
-
if (!shouldAddRecord) {
|
202
|
-
return;
|
203
|
-
}
|
204
|
-
if (pageReader.isNull(column)) {
|
205
|
-
pageBuilder.setNull(column);
|
206
|
-
}
|
207
|
-
else {
|
208
|
-
pageBuilder.setJson(column, pageReader.getJson(column));
|
209
|
-
}
|
210
176
|
}
|
211
177
|
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package org.embulk.filter.row;
|
2
|
+
|
3
|
+
import org.embulk.filter.row.RowFilterPlugin.PluginTask;
|
4
|
+
|
5
|
+
import org.embulk.filter.row.where.ParserExp;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.Schema;
|
9
|
+
|
10
|
+
import org.slf4j.Logger;
|
11
|
+
|
12
|
+
class GuardColumnVisitorWhereImpl
|
13
|
+
extends AbstractGuardColumnVisitor
|
14
|
+
{
|
15
|
+
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
16
|
+
ParserExp parserExp;
|
17
|
+
|
18
|
+
GuardColumnVisitorWhereImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, ParserExp parserExp)
|
19
|
+
{
|
20
|
+
super(task, inputSchema, outputSchema, pageReader);
|
21
|
+
this.parserExp = parserExp;
|
22
|
+
}
|
23
|
+
|
24
|
+
public boolean visitColumns(Schema inputSchema)
|
25
|
+
{
|
26
|
+
return parserExp.eval(pageReader);
|
27
|
+
}
|
28
|
+
}
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.filter.row;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
|
3
5
|
import org.embulk.config.Config;
|
4
6
|
import org.embulk.config.ConfigDefault;
|
5
7
|
import org.embulk.config.ConfigException;
|
@@ -8,8 +10,10 @@ import org.embulk.config.Task;
|
|
8
10
|
import org.embulk.config.TaskSource;
|
9
11
|
|
10
12
|
import org.embulk.filter.row.condition.ConditionConfig;
|
13
|
+
import org.embulk.filter.row.where.Parser;
|
14
|
+
import org.embulk.filter.row.where.ParserExp;
|
11
15
|
|
12
|
-
import org.embulk.
|
16
|
+
import org.embulk.filter.row.where.ParserLiteral;
|
13
17
|
import org.embulk.spi.Exec;
|
14
18
|
import org.embulk.spi.FilterPlugin;
|
15
19
|
import org.embulk.spi.Page;
|
@@ -26,6 +30,7 @@ import java.util.List;
|
|
26
30
|
public class RowFilterPlugin implements FilterPlugin
|
27
31
|
{
|
28
32
|
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
33
|
+
private ParserExp parserExp = null;
|
29
34
|
|
30
35
|
public RowFilterPlugin() {}
|
31
36
|
|
@@ -36,7 +41,12 @@ public class RowFilterPlugin implements FilterPlugin
|
|
36
41
|
public String getCondition();
|
37
42
|
|
38
43
|
@Config("conditions")
|
39
|
-
|
44
|
+
@ConfigDefault("null")
|
45
|
+
public Optional<List<ConditionConfig>> getConditions();
|
46
|
+
|
47
|
+
@Config("where")
|
48
|
+
@ConfigDefault("null")
|
49
|
+
public Optional<String> getWhere();
|
40
50
|
}
|
41
51
|
|
42
52
|
@Override
|
@@ -44,6 +54,7 @@ public class RowFilterPlugin implements FilterPlugin
|
|
44
54
|
FilterPlugin.Control control)
|
45
55
|
{
|
46
56
|
PluginTask task = config.loadConfig(PluginTask.class);
|
57
|
+
ParserLiteral.setJRuby(task.getJRuby());
|
47
58
|
|
48
59
|
configure(task, inputSchema);
|
49
60
|
Schema outputSchema = inputSchema;
|
@@ -53,14 +64,24 @@ public class RowFilterPlugin implements FilterPlugin
|
|
53
64
|
|
54
65
|
void configure(PluginTask task, Schema inputSchema) throws ConfigException
|
55
66
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
67
|
+
if (task.getConditions().isPresent()) {
|
68
|
+
for (ConditionConfig conditionConfig : task.getConditions().get()) {
|
69
|
+
String columnName = conditionConfig.getColumn();
|
70
|
+
inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
|
71
|
+
}
|
60
72
|
|
61
|
-
|
62
|
-
|
63
|
-
|
73
|
+
String condition = task.getCondition().toLowerCase();
|
74
|
+
if (!condition.equals("or") && !condition.equals("and")) {
|
75
|
+
throw new ConfigException("condition must be either of \"or\" or \"and\".");
|
76
|
+
}
|
77
|
+
}
|
78
|
+
else if (task.getWhere().isPresent()) {
|
79
|
+
String where = task.getWhere().get();
|
80
|
+
Parser parser = new Parser(inputSchema);
|
81
|
+
parserExp = parser.parse(where); // throw ConfigException if something wrong
|
82
|
+
}
|
83
|
+
else {
|
84
|
+
throw new ConfigException("Either of `conditions` or `where` must be set.");
|
64
85
|
}
|
65
86
|
}
|
66
87
|
|
@@ -70,14 +91,24 @@ public class RowFilterPlugin implements FilterPlugin
|
|
70
91
|
{
|
71
92
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
72
93
|
final boolean orCondition = task.getCondition().toLowerCase().equals("or");
|
94
|
+
final PageReader pageReader = new PageReader(inputSchema);
|
95
|
+
final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
73
96
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
97
|
+
final AbstractGuardColumnVisitor guradVisitor;
|
98
|
+
if (task.getWhere().isPresent()) {
|
99
|
+
guradVisitor = new GuardColumnVisitorWhereImpl(task, inputSchema, outputSchema, pageReader, parserExp);
|
100
|
+
}
|
101
|
+
else if (orCondition) {
|
102
|
+
guradVisitor = new GuardColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader);
|
103
|
+
}
|
104
|
+
else {
|
105
|
+
guradVisitor = new GuardColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader);
|
106
|
+
}
|
107
|
+
|
108
|
+
final BuildColumnVisitorImpl buildVisitor;
|
109
|
+
buildVisitor = new BuildColumnVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
80
110
|
|
111
|
+
return new PageOutput() {
|
81
112
|
@Override
|
82
113
|
public void finish()
|
83
114
|
{
|
@@ -96,7 +127,9 @@ public class RowFilterPlugin implements FilterPlugin
|
|
96
127
|
pageReader.setPage(page);
|
97
128
|
|
98
129
|
while (pageReader.nextRecord()) {
|
99
|
-
if (
|
130
|
+
if (guradVisitor.visitColumns(inputSchema)) {
|
131
|
+
// output.add(page); did not work, double release() error occurred. We need to copy from reader to builder...
|
132
|
+
outputSchema.visitColumns(buildVisitor);
|
100
133
|
pageBuilder.addRecord();
|
101
134
|
}
|
102
135
|
}
|