embulk-filter-row 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/CHANGELOG.md +6 -0
  4. data/README.md +124 -4
  5. data/build.gradle +15 -4
  6. data/classpath/embulk-filter-row-0.3.0.jar +0 -0
  7. data/example/and.yml +0 -7
  8. data/example/example.yml +0 -7
  9. data/example/where.yml +28 -0
  10. data/script/byaccj.sh +29 -0
  11. data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
  12. data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
  13. data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
  14. data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
  15. data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
  16. data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
  17. data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
  18. data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
  19. data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
  20. data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
  21. data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
  22. data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
  23. data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
  24. data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
  25. data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
  26. data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
  27. metadata +19 -5
  28. data/classpath/embulk-filter-row-0.2.2.jar +0 -0
@@ -9,32 +9,37 @@ import org.embulk.filter.row.condition.StringCondition;
9
9
  import org.embulk.filter.row.condition.TimestampCondition;
10
10
 
11
11
  import org.embulk.spi.Column;
12
+ import org.embulk.spi.ColumnVisitor;
12
13
  import org.embulk.spi.Exec;
13
- import org.embulk.spi.PageBuilder;
14
14
  import org.embulk.spi.PageReader;
15
15
  import org.embulk.spi.Schema;
16
16
  import org.embulk.spi.time.Timestamp;
17
17
 
18
18
  import org.slf4j.Logger;
19
19
 
20
+ import java.util.HashMap;
20
21
  import java.util.List;
21
22
 
22
- class ColumnVisitorOrImpl extends AbstractColumnVisitor
23
+ class GuardColumnVisitorOrImpl
24
+ extends AbstractGuardColumnVisitor
25
+ implements ColumnVisitor
23
26
  {
24
27
  private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
25
28
  private boolean shouldAddRecord;
29
+ private HashMap<String, List<Condition>> conditionMap;
26
30
 
27
- ColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
31
+ GuardColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader)
28
32
  {
29
- super(task, inputSchema, outputSchema, pageReader, pageBuilder);
33
+ super(task, inputSchema, outputSchema, pageReader);
34
+ this.conditionMap = buildConditionMap(task, outputSchema);
30
35
  }
31
36
 
32
- public boolean visitColumns(Schema schema)
37
+ public boolean visitColumns(Schema inputSchema)
33
38
  {
34
39
  //Visitor objects are created for each thread :)
35
40
  //System.out.println(String.format("thread_id:%d object_id:%d", Thread.currentThread().getId(), this.hashCode()));
36
41
  shouldAddRecord = false;
37
- for (Column column : schema.getColumns()) {
42
+ for (Column column : inputSchema.getColumns()) {
38
43
  column.visit(this);
39
44
  }
40
45
  return shouldAddRecord;
@@ -43,12 +48,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
43
48
  @Override
44
49
  public void booleanColumn(Column column)
45
50
  {
46
- if (pageReader.isNull(column)) {
47
- pageBuilder.setNull(column);
48
- }
49
- else {
50
- pageBuilder.setBoolean(column, pageReader.getBoolean(column));
51
- }
52
51
  if (shouldAddRecord) {
53
52
  return;
54
53
  }
@@ -74,12 +73,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
74
73
  @Override
75
74
  public void longColumn(Column column)
76
75
  {
77
- if (pageReader.isNull(column)) {
78
- pageBuilder.setNull(column);
79
- }
80
- else {
81
- pageBuilder.setLong(column, pageReader.getLong(column));
82
- }
83
76
  if (shouldAddRecord) {
84
77
  return;
85
78
  }
@@ -105,12 +98,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
105
98
  @Override
106
99
  public void doubleColumn(Column column)
107
100
  {
108
- if (pageReader.isNull(column)) {
109
- pageBuilder.setNull(column);
110
- }
111
- else {
112
- pageBuilder.setDouble(column, pageReader.getDouble(column));
113
- }
114
101
  if (shouldAddRecord) {
115
102
  return;
116
103
  }
@@ -136,12 +123,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
136
123
  @Override
137
124
  public void stringColumn(Column column)
138
125
  {
139
- if (pageReader.isNull(column)) {
140
- pageBuilder.setNull(column);
141
- }
142
- else {
143
- pageBuilder.setString(column, pageReader.getString(column));
144
- }
145
126
  if (shouldAddRecord) {
146
127
  return;
147
128
  }
@@ -167,12 +148,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
167
148
  @Override
168
149
  public void timestampColumn(Column column)
169
150
  {
170
- if (pageReader.isNull(column)) {
171
- pageBuilder.setNull(column);
172
- }
173
- else {
174
- pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
175
- }
176
151
  if (shouldAddRecord) {
177
152
  return;
178
153
  }
@@ -198,14 +173,5 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
198
173
  @Override
199
174
  public void jsonColumn(Column column)
200
175
  {
201
- if (!shouldAddRecord) {
202
- return;
203
- }
204
- if (pageReader.isNull(column)) {
205
- pageBuilder.setNull(column);
206
- }
207
- else {
208
- pageBuilder.setJson(column, pageReader.getJson(column));
209
- }
210
176
  }
211
177
  }
@@ -0,0 +1,28 @@
1
+ package org.embulk.filter.row;
2
+
3
+ import org.embulk.filter.row.RowFilterPlugin.PluginTask;
4
+
5
+ import org.embulk.filter.row.where.ParserExp;
6
+ import org.embulk.spi.Exec;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.Schema;
9
+
10
+ import org.slf4j.Logger;
11
+
12
+ class GuardColumnVisitorWhereImpl
13
+ extends AbstractGuardColumnVisitor
14
+ {
15
+ private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
16
+ ParserExp parserExp;
17
+
18
+ GuardColumnVisitorWhereImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, ParserExp parserExp)
19
+ {
20
+ super(task, inputSchema, outputSchema, pageReader);
21
+ this.parserExp = parserExp;
22
+ }
23
+
24
+ public boolean visitColumns(Schema inputSchema)
25
+ {
26
+ return parserExp.eval(pageReader);
27
+ }
28
+ }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.row;
2
2
 
3
+ import com.google.common.base.Optional;
4
+
3
5
  import org.embulk.config.Config;
4
6
  import org.embulk.config.ConfigDefault;
5
7
  import org.embulk.config.ConfigException;
@@ -8,8 +10,10 @@ import org.embulk.config.Task;
8
10
  import org.embulk.config.TaskSource;
9
11
 
10
12
  import org.embulk.filter.row.condition.ConditionConfig;
13
+ import org.embulk.filter.row.where.Parser;
14
+ import org.embulk.filter.row.where.ParserExp;
11
15
 
12
- import org.embulk.spi.ColumnVisitor;
16
+ import org.embulk.filter.row.where.ParserLiteral;
13
17
  import org.embulk.spi.Exec;
14
18
  import org.embulk.spi.FilterPlugin;
15
19
  import org.embulk.spi.Page;
@@ -26,6 +30,7 @@ import java.util.List;
26
30
  public class RowFilterPlugin implements FilterPlugin
27
31
  {
28
32
  private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
33
+ private ParserExp parserExp = null;
29
34
 
30
35
  public RowFilterPlugin() {}
31
36
 
@@ -36,7 +41,12 @@ public class RowFilterPlugin implements FilterPlugin
36
41
  public String getCondition();
37
42
 
38
43
  @Config("conditions")
39
- public List<ConditionConfig> getConditions();
44
+ @ConfigDefault("null")
45
+ public Optional<List<ConditionConfig>> getConditions();
46
+
47
+ @Config("where")
48
+ @ConfigDefault("null")
49
+ public Optional<String> getWhere();
40
50
  }
41
51
 
42
52
  @Override
@@ -44,6 +54,7 @@ public class RowFilterPlugin implements FilterPlugin
44
54
  FilterPlugin.Control control)
45
55
  {
46
56
  PluginTask task = config.loadConfig(PluginTask.class);
57
+ ParserLiteral.setJRuby(task.getJRuby());
47
58
 
48
59
  configure(task, inputSchema);
49
60
  Schema outputSchema = inputSchema;
@@ -53,14 +64,24 @@ public class RowFilterPlugin implements FilterPlugin
53
64
 
54
65
  void configure(PluginTask task, Schema inputSchema) throws ConfigException
55
66
  {
56
- for (ConditionConfig conditionConfig : task.getConditions()) {
57
- String columnName = conditionConfig.getColumn();
58
- inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
59
- }
67
+ if (task.getConditions().isPresent()) {
68
+ for (ConditionConfig conditionConfig : task.getConditions().get()) {
69
+ String columnName = conditionConfig.getColumn();
70
+ inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
71
+ }
60
72
 
61
- String condition = task.getCondition().toLowerCase();
62
- if (!condition.equals("or") && !condition.equals("and")) {
63
- throw new ConfigException("condition must be either of \"or\" or \"and\".");
73
+ String condition = task.getCondition().toLowerCase();
74
+ if (!condition.equals("or") && !condition.equals("and")) {
75
+ throw new ConfigException("condition must be either of \"or\" or \"and\".");
76
+ }
77
+ }
78
+ else if (task.getWhere().isPresent()) {
79
+ String where = task.getWhere().get();
80
+ Parser parser = new Parser(inputSchema);
81
+ parserExp = parser.parse(where); // throw ConfigException if something wrong
82
+ }
83
+ else {
84
+ throw new ConfigException("Either of `conditions` or `where` must be set.");
64
85
  }
65
86
  }
66
87
 
@@ -70,14 +91,24 @@ public class RowFilterPlugin implements FilterPlugin
70
91
  {
71
92
  final PluginTask task = taskSource.loadTask(PluginTask.class);
72
93
  final boolean orCondition = task.getCondition().toLowerCase().equals("or");
94
+ final PageReader pageReader = new PageReader(inputSchema);
95
+ final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
73
96
 
74
- return new PageOutput() {
75
- private PageReader pageReader = new PageReader(inputSchema);
76
- private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
77
- private AbstractColumnVisitor visitor = orCondition ?
78
- new ColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader, pageBuilder) :
79
- new ColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
97
+ final AbstractGuardColumnVisitor guradVisitor;
98
+ if (task.getWhere().isPresent()) {
99
+ guradVisitor = new GuardColumnVisitorWhereImpl(task, inputSchema, outputSchema, pageReader, parserExp);
100
+ }
101
+ else if (orCondition) {
102
+ guradVisitor = new GuardColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader);
103
+ }
104
+ else {
105
+ guradVisitor = new GuardColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader);
106
+ }
107
+
108
+ final BuildColumnVisitorImpl buildVisitor;
109
+ buildVisitor = new BuildColumnVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
80
110
 
111
+ return new PageOutput() {
81
112
  @Override
82
113
  public void finish()
83
114
  {
@@ -96,7 +127,9 @@ public class RowFilterPlugin implements FilterPlugin
96
127
  pageReader.setPage(page);
97
128
 
98
129
  while (pageReader.nextRecord()) {
99
- if (visitor.visitColumns(inputSchema)) {
130
+ if (guradVisitor.visitColumns(inputSchema)) {
131
+ // output.add(page); did not work, double release() error occurred. We need to copy from reader to builder...
132
+ outputSchema.visitColumns(buildVisitor);
100
133
  pageBuilder.addRecord();
101
134
  }
102
135
  }