embulk-filter-row 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +6 -0
- data/README.md +124 -4
- data/build.gradle +15 -4
- data/classpath/embulk-filter-row-0.3.0.jar +0 -0
- data/example/and.yml +0 -7
- data/example/example.yml +0 -7
- data/example/where.yml +28 -0
- data/script/byaccj.sh +29 -0
- data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
- data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
- data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
- data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
- data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
- data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
- data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
- data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
- data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
- data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
- data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
- data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
- data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
- data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
- data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
- data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
- metadata +19 -5
- data/classpath/embulk-filter-row-0.2.2.jar +0 -0
data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java}
RENAMED
@@ -9,32 +9,37 @@ import org.embulk.filter.row.condition.StringCondition;
|
|
9
9
|
import org.embulk.filter.row.condition.TimestampCondition;
|
10
10
|
|
11
11
|
import org.embulk.spi.Column;
|
12
|
+
import org.embulk.spi.ColumnVisitor;
|
12
13
|
import org.embulk.spi.Exec;
|
13
|
-
import org.embulk.spi.PageBuilder;
|
14
14
|
import org.embulk.spi.PageReader;
|
15
15
|
import org.embulk.spi.Schema;
|
16
16
|
import org.embulk.spi.time.Timestamp;
|
17
17
|
|
18
18
|
import org.slf4j.Logger;
|
19
19
|
|
20
|
+
import java.util.HashMap;
|
20
21
|
import java.util.List;
|
21
22
|
|
22
|
-
class
|
23
|
+
class GuardColumnVisitorOrImpl
|
24
|
+
extends AbstractGuardColumnVisitor
|
25
|
+
implements ColumnVisitor
|
23
26
|
{
|
24
27
|
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
25
28
|
private boolean shouldAddRecord;
|
29
|
+
private HashMap<String, List<Condition>> conditionMap;
|
26
30
|
|
27
|
-
|
31
|
+
GuardColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader)
|
28
32
|
{
|
29
|
-
super(task, inputSchema, outputSchema, pageReader
|
33
|
+
super(task, inputSchema, outputSchema, pageReader);
|
34
|
+
this.conditionMap = buildConditionMap(task, outputSchema);
|
30
35
|
}
|
31
36
|
|
32
|
-
public boolean visitColumns(Schema
|
37
|
+
public boolean visitColumns(Schema inputSchema)
|
33
38
|
{
|
34
39
|
//Visitor objects are created for each thread :)
|
35
40
|
//System.out.println(String.format("thread_id:%d object_id:%d", Thread.currentThread().getId(), this.hashCode()));
|
36
41
|
shouldAddRecord = false;
|
37
|
-
for (Column column :
|
42
|
+
for (Column column : inputSchema.getColumns()) {
|
38
43
|
column.visit(this);
|
39
44
|
}
|
40
45
|
return shouldAddRecord;
|
@@ -43,12 +48,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
43
48
|
@Override
|
44
49
|
public void booleanColumn(Column column)
|
45
50
|
{
|
46
|
-
if (pageReader.isNull(column)) {
|
47
|
-
pageBuilder.setNull(column);
|
48
|
-
}
|
49
|
-
else {
|
50
|
-
pageBuilder.setBoolean(column, pageReader.getBoolean(column));
|
51
|
-
}
|
52
51
|
if (shouldAddRecord) {
|
53
52
|
return;
|
54
53
|
}
|
@@ -74,12 +73,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
74
73
|
@Override
|
75
74
|
public void longColumn(Column column)
|
76
75
|
{
|
77
|
-
if (pageReader.isNull(column)) {
|
78
|
-
pageBuilder.setNull(column);
|
79
|
-
}
|
80
|
-
else {
|
81
|
-
pageBuilder.setLong(column, pageReader.getLong(column));
|
82
|
-
}
|
83
76
|
if (shouldAddRecord) {
|
84
77
|
return;
|
85
78
|
}
|
@@ -105,12 +98,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
105
98
|
@Override
|
106
99
|
public void doubleColumn(Column column)
|
107
100
|
{
|
108
|
-
if (pageReader.isNull(column)) {
|
109
|
-
pageBuilder.setNull(column);
|
110
|
-
}
|
111
|
-
else {
|
112
|
-
pageBuilder.setDouble(column, pageReader.getDouble(column));
|
113
|
-
}
|
114
101
|
if (shouldAddRecord) {
|
115
102
|
return;
|
116
103
|
}
|
@@ -136,12 +123,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
136
123
|
@Override
|
137
124
|
public void stringColumn(Column column)
|
138
125
|
{
|
139
|
-
if (pageReader.isNull(column)) {
|
140
|
-
pageBuilder.setNull(column);
|
141
|
-
}
|
142
|
-
else {
|
143
|
-
pageBuilder.setString(column, pageReader.getString(column));
|
144
|
-
}
|
145
126
|
if (shouldAddRecord) {
|
146
127
|
return;
|
147
128
|
}
|
@@ -167,12 +148,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
167
148
|
@Override
|
168
149
|
public void timestampColumn(Column column)
|
169
150
|
{
|
170
|
-
if (pageReader.isNull(column)) {
|
171
|
-
pageBuilder.setNull(column);
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
|
175
|
-
}
|
176
151
|
if (shouldAddRecord) {
|
177
152
|
return;
|
178
153
|
}
|
@@ -198,14 +173,5 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
|
|
198
173
|
@Override
|
199
174
|
public void jsonColumn(Column column)
|
200
175
|
{
|
201
|
-
if (!shouldAddRecord) {
|
202
|
-
return;
|
203
|
-
}
|
204
|
-
if (pageReader.isNull(column)) {
|
205
|
-
pageBuilder.setNull(column);
|
206
|
-
}
|
207
|
-
else {
|
208
|
-
pageBuilder.setJson(column, pageReader.getJson(column));
|
209
|
-
}
|
210
176
|
}
|
211
177
|
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package org.embulk.filter.row;
|
2
|
+
|
3
|
+
import org.embulk.filter.row.RowFilterPlugin.PluginTask;
|
4
|
+
|
5
|
+
import org.embulk.filter.row.where.ParserExp;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.Schema;
|
9
|
+
|
10
|
+
import org.slf4j.Logger;
|
11
|
+
|
12
|
+
class GuardColumnVisitorWhereImpl
|
13
|
+
extends AbstractGuardColumnVisitor
|
14
|
+
{
|
15
|
+
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
16
|
+
ParserExp parserExp;
|
17
|
+
|
18
|
+
GuardColumnVisitorWhereImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, ParserExp parserExp)
|
19
|
+
{
|
20
|
+
super(task, inputSchema, outputSchema, pageReader);
|
21
|
+
this.parserExp = parserExp;
|
22
|
+
}
|
23
|
+
|
24
|
+
public boolean visitColumns(Schema inputSchema)
|
25
|
+
{
|
26
|
+
return parserExp.eval(pageReader);
|
27
|
+
}
|
28
|
+
}
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.filter.row;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
|
3
5
|
import org.embulk.config.Config;
|
4
6
|
import org.embulk.config.ConfigDefault;
|
5
7
|
import org.embulk.config.ConfigException;
|
@@ -8,8 +10,10 @@ import org.embulk.config.Task;
|
|
8
10
|
import org.embulk.config.TaskSource;
|
9
11
|
|
10
12
|
import org.embulk.filter.row.condition.ConditionConfig;
|
13
|
+
import org.embulk.filter.row.where.Parser;
|
14
|
+
import org.embulk.filter.row.where.ParserExp;
|
11
15
|
|
12
|
-
import org.embulk.
|
16
|
+
import org.embulk.filter.row.where.ParserLiteral;
|
13
17
|
import org.embulk.spi.Exec;
|
14
18
|
import org.embulk.spi.FilterPlugin;
|
15
19
|
import org.embulk.spi.Page;
|
@@ -26,6 +30,7 @@ import java.util.List;
|
|
26
30
|
public class RowFilterPlugin implements FilterPlugin
|
27
31
|
{
|
28
32
|
private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
|
33
|
+
private ParserExp parserExp = null;
|
29
34
|
|
30
35
|
public RowFilterPlugin() {}
|
31
36
|
|
@@ -36,7 +41,12 @@ public class RowFilterPlugin implements FilterPlugin
|
|
36
41
|
public String getCondition();
|
37
42
|
|
38
43
|
@Config("conditions")
|
39
|
-
|
44
|
+
@ConfigDefault("null")
|
45
|
+
public Optional<List<ConditionConfig>> getConditions();
|
46
|
+
|
47
|
+
@Config("where")
|
48
|
+
@ConfigDefault("null")
|
49
|
+
public Optional<String> getWhere();
|
40
50
|
}
|
41
51
|
|
42
52
|
@Override
|
@@ -44,6 +54,7 @@ public class RowFilterPlugin implements FilterPlugin
|
|
44
54
|
FilterPlugin.Control control)
|
45
55
|
{
|
46
56
|
PluginTask task = config.loadConfig(PluginTask.class);
|
57
|
+
ParserLiteral.setJRuby(task.getJRuby());
|
47
58
|
|
48
59
|
configure(task, inputSchema);
|
49
60
|
Schema outputSchema = inputSchema;
|
@@ -53,14 +64,24 @@ public class RowFilterPlugin implements FilterPlugin
|
|
53
64
|
|
54
65
|
void configure(PluginTask task, Schema inputSchema) throws ConfigException
|
55
66
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
67
|
+
if (task.getConditions().isPresent()) {
|
68
|
+
for (ConditionConfig conditionConfig : task.getConditions().get()) {
|
69
|
+
String columnName = conditionConfig.getColumn();
|
70
|
+
inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
|
71
|
+
}
|
60
72
|
|
61
|
-
|
62
|
-
|
63
|
-
|
73
|
+
String condition = task.getCondition().toLowerCase();
|
74
|
+
if (!condition.equals("or") && !condition.equals("and")) {
|
75
|
+
throw new ConfigException("condition must be either of \"or\" or \"and\".");
|
76
|
+
}
|
77
|
+
}
|
78
|
+
else if (task.getWhere().isPresent()) {
|
79
|
+
String where = task.getWhere().get();
|
80
|
+
Parser parser = new Parser(inputSchema);
|
81
|
+
parserExp = parser.parse(where); // throw ConfigException if something wrong
|
82
|
+
}
|
83
|
+
else {
|
84
|
+
throw new ConfigException("Either of `conditions` or `where` must be set.");
|
64
85
|
}
|
65
86
|
}
|
66
87
|
|
@@ -70,14 +91,24 @@ public class RowFilterPlugin implements FilterPlugin
|
|
70
91
|
{
|
71
92
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
72
93
|
final boolean orCondition = task.getCondition().toLowerCase().equals("or");
|
94
|
+
final PageReader pageReader = new PageReader(inputSchema);
|
95
|
+
final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
73
96
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
97
|
+
final AbstractGuardColumnVisitor guradVisitor;
|
98
|
+
if (task.getWhere().isPresent()) {
|
99
|
+
guradVisitor = new GuardColumnVisitorWhereImpl(task, inputSchema, outputSchema, pageReader, parserExp);
|
100
|
+
}
|
101
|
+
else if (orCondition) {
|
102
|
+
guradVisitor = new GuardColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader);
|
103
|
+
}
|
104
|
+
else {
|
105
|
+
guradVisitor = new GuardColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader);
|
106
|
+
}
|
107
|
+
|
108
|
+
final BuildColumnVisitorImpl buildVisitor;
|
109
|
+
buildVisitor = new BuildColumnVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
80
110
|
|
111
|
+
return new PageOutput() {
|
81
112
|
@Override
|
82
113
|
public void finish()
|
83
114
|
{
|
@@ -96,7 +127,9 @@ public class RowFilterPlugin implements FilterPlugin
|
|
96
127
|
pageReader.setPage(page);
|
97
128
|
|
98
129
|
while (pageReader.nextRecord()) {
|
99
|
-
if (
|
130
|
+
if (guradVisitor.visitColumns(inputSchema)) {
|
131
|
+
// output.add(page); did not work, double release() error occurred. We need to copy from reader to builder...
|
132
|
+
outputSchema.visitColumns(buildVisitor);
|
100
133
|
pageBuilder.addRecord();
|
101
134
|
}
|
102
135
|
}
|