embulk-filter-row 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/CHANGELOG.md +6 -0
  4. data/README.md +124 -4
  5. data/build.gradle +15 -4
  6. data/classpath/embulk-filter-row-0.3.0.jar +0 -0
  7. data/example/and.yml +0 -7
  8. data/example/example.yml +0 -7
  9. data/example/where.yml +28 -0
  10. data/script/byaccj.sh +29 -0
  11. data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
  12. data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
  13. data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
  14. data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
  15. data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
  16. data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
  17. data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
  18. data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
  19. data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
  20. data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
  21. data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
  22. data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
  23. data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
  24. data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
  25. data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
  26. data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
  27. metadata +19 -5
  28. data/classpath/embulk-filter-row-0.2.2.jar +0 -0
@@ -9,32 +9,37 @@ import org.embulk.filter.row.condition.StringCondition;
9
9
  import org.embulk.filter.row.condition.TimestampCondition;
10
10
 
11
11
  import org.embulk.spi.Column;
12
+ import org.embulk.spi.ColumnVisitor;
12
13
  import org.embulk.spi.Exec;
13
- import org.embulk.spi.PageBuilder;
14
14
  import org.embulk.spi.PageReader;
15
15
  import org.embulk.spi.Schema;
16
16
  import org.embulk.spi.time.Timestamp;
17
17
 
18
18
  import org.slf4j.Logger;
19
19
 
20
+ import java.util.HashMap;
20
21
  import java.util.List;
21
22
 
22
- class ColumnVisitorOrImpl extends AbstractColumnVisitor
23
+ class GuardColumnVisitorOrImpl
24
+ extends AbstractGuardColumnVisitor
25
+ implements ColumnVisitor
23
26
  {
24
27
  private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
25
28
  private boolean shouldAddRecord;
29
+ private HashMap<String, List<Condition>> conditionMap;
26
30
 
27
- ColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
31
+ GuardColumnVisitorOrImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader)
28
32
  {
29
- super(task, inputSchema, outputSchema, pageReader, pageBuilder);
33
+ super(task, inputSchema, outputSchema, pageReader);
34
+ this.conditionMap = buildConditionMap(task, outputSchema);
30
35
  }
31
36
 
32
- public boolean visitColumns(Schema schema)
37
+ public boolean visitColumns(Schema inputSchema)
33
38
  {
34
39
  //Visitor objects are created for each thread :)
35
40
  //System.out.println(String.format("thread_id:%d object_id:%d", Thread.currentThread().getId(), this.hashCode()));
36
41
  shouldAddRecord = false;
37
- for (Column column : schema.getColumns()) {
42
+ for (Column column : inputSchema.getColumns()) {
38
43
  column.visit(this);
39
44
  }
40
45
  return shouldAddRecord;
@@ -43,12 +48,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
43
48
  @Override
44
49
  public void booleanColumn(Column column)
45
50
  {
46
- if (pageReader.isNull(column)) {
47
- pageBuilder.setNull(column);
48
- }
49
- else {
50
- pageBuilder.setBoolean(column, pageReader.getBoolean(column));
51
- }
52
51
  if (shouldAddRecord) {
53
52
  return;
54
53
  }
@@ -74,12 +73,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
74
73
  @Override
75
74
  public void longColumn(Column column)
76
75
  {
77
- if (pageReader.isNull(column)) {
78
- pageBuilder.setNull(column);
79
- }
80
- else {
81
- pageBuilder.setLong(column, pageReader.getLong(column));
82
- }
83
76
  if (shouldAddRecord) {
84
77
  return;
85
78
  }
@@ -105,12 +98,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
105
98
  @Override
106
99
  public void doubleColumn(Column column)
107
100
  {
108
- if (pageReader.isNull(column)) {
109
- pageBuilder.setNull(column);
110
- }
111
- else {
112
- pageBuilder.setDouble(column, pageReader.getDouble(column));
113
- }
114
101
  if (shouldAddRecord) {
115
102
  return;
116
103
  }
@@ -136,12 +123,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
136
123
  @Override
137
124
  public void stringColumn(Column column)
138
125
  {
139
- if (pageReader.isNull(column)) {
140
- pageBuilder.setNull(column);
141
- }
142
- else {
143
- pageBuilder.setString(column, pageReader.getString(column));
144
- }
145
126
  if (shouldAddRecord) {
146
127
  return;
147
128
  }
@@ -167,12 +148,6 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
167
148
  @Override
168
149
  public void timestampColumn(Column column)
169
150
  {
170
- if (pageReader.isNull(column)) {
171
- pageBuilder.setNull(column);
172
- }
173
- else {
174
- pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
175
- }
176
151
  if (shouldAddRecord) {
177
152
  return;
178
153
  }
@@ -198,14 +173,5 @@ class ColumnVisitorOrImpl extends AbstractColumnVisitor
198
173
  @Override
199
174
  public void jsonColumn(Column column)
200
175
  {
201
- if (!shouldAddRecord) {
202
- return;
203
- }
204
- if (pageReader.isNull(column)) {
205
- pageBuilder.setNull(column);
206
- }
207
- else {
208
- pageBuilder.setJson(column, pageReader.getJson(column));
209
- }
210
176
  }
211
177
  }
@@ -0,0 +1,28 @@
1
+ package org.embulk.filter.row;
2
+
3
+ import org.embulk.filter.row.RowFilterPlugin.PluginTask;
4
+
5
+ import org.embulk.filter.row.where.ParserExp;
6
+ import org.embulk.spi.Exec;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.Schema;
9
+
10
+ import org.slf4j.Logger;
11
+
12
+ class GuardColumnVisitorWhereImpl
13
+ extends AbstractGuardColumnVisitor
14
+ {
15
+ private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
16
+ ParserExp parserExp;
17
+
18
+ GuardColumnVisitorWhereImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, ParserExp parserExp)
19
+ {
20
+ super(task, inputSchema, outputSchema, pageReader);
21
+ this.parserExp = parserExp;
22
+ }
23
+
24
+ public boolean visitColumns(Schema inputSchema)
25
+ {
26
+ return parserExp.eval(pageReader);
27
+ }
28
+ }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.row;
2
2
 
3
+ import com.google.common.base.Optional;
4
+
3
5
  import org.embulk.config.Config;
4
6
  import org.embulk.config.ConfigDefault;
5
7
  import org.embulk.config.ConfigException;
@@ -8,8 +10,10 @@ import org.embulk.config.Task;
8
10
  import org.embulk.config.TaskSource;
9
11
 
10
12
  import org.embulk.filter.row.condition.ConditionConfig;
13
+ import org.embulk.filter.row.where.Parser;
14
+ import org.embulk.filter.row.where.ParserExp;
11
15
 
12
- import org.embulk.spi.ColumnVisitor;
16
+ import org.embulk.filter.row.where.ParserLiteral;
13
17
  import org.embulk.spi.Exec;
14
18
  import org.embulk.spi.FilterPlugin;
15
19
  import org.embulk.spi.Page;
@@ -26,6 +30,7 @@ import java.util.List;
26
30
  public class RowFilterPlugin implements FilterPlugin
27
31
  {
28
32
  private static final Logger logger = Exec.getLogger(RowFilterPlugin.class);
33
+ private ParserExp parserExp = null;
29
34
 
30
35
  public RowFilterPlugin() {}
31
36
 
@@ -36,7 +41,12 @@ public class RowFilterPlugin implements FilterPlugin
36
41
  public String getCondition();
37
42
 
38
43
  @Config("conditions")
39
- public List<ConditionConfig> getConditions();
44
+ @ConfigDefault("null")
45
+ public Optional<List<ConditionConfig>> getConditions();
46
+
47
+ @Config("where")
48
+ @ConfigDefault("null")
49
+ public Optional<String> getWhere();
40
50
  }
41
51
 
42
52
  @Override
@@ -44,6 +54,7 @@ public class RowFilterPlugin implements FilterPlugin
44
54
  FilterPlugin.Control control)
45
55
  {
46
56
  PluginTask task = config.loadConfig(PluginTask.class);
57
+ ParserLiteral.setJRuby(task.getJRuby());
47
58
 
48
59
  configure(task, inputSchema);
49
60
  Schema outputSchema = inputSchema;
@@ -53,14 +64,24 @@ public class RowFilterPlugin implements FilterPlugin
53
64
 
54
65
  void configure(PluginTask task, Schema inputSchema) throws ConfigException
55
66
  {
56
- for (ConditionConfig conditionConfig : task.getConditions()) {
57
- String columnName = conditionConfig.getColumn();
58
- inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
59
- }
67
+ if (task.getConditions().isPresent()) {
68
+ for (ConditionConfig conditionConfig : task.getConditions().get()) {
69
+ String columnName = conditionConfig.getColumn();
70
+ inputSchema.lookupColumn(columnName); // throw SchemaConfigException if not found
71
+ }
60
72
 
61
- String condition = task.getCondition().toLowerCase();
62
- if (!condition.equals("or") && !condition.equals("and")) {
63
- throw new ConfigException("condition must be either of \"or\" or \"and\".");
73
+ String condition = task.getCondition().toLowerCase();
74
+ if (!condition.equals("or") && !condition.equals("and")) {
75
+ throw new ConfigException("condition must be either of \"or\" or \"and\".");
76
+ }
77
+ }
78
+ else if (task.getWhere().isPresent()) {
79
+ String where = task.getWhere().get();
80
+ Parser parser = new Parser(inputSchema);
81
+ parserExp = parser.parse(where); // throw ConfigException if something wrong
82
+ }
83
+ else {
84
+ throw new ConfigException("Either of `conditions` or `where` must be set.");
64
85
  }
65
86
  }
66
87
 
@@ -70,14 +91,24 @@ public class RowFilterPlugin implements FilterPlugin
70
91
  {
71
92
  final PluginTask task = taskSource.loadTask(PluginTask.class);
72
93
  final boolean orCondition = task.getCondition().toLowerCase().equals("or");
94
+ final PageReader pageReader = new PageReader(inputSchema);
95
+ final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
73
96
 
74
- return new PageOutput() {
75
- private PageReader pageReader = new PageReader(inputSchema);
76
- private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
77
- private AbstractColumnVisitor visitor = orCondition ?
78
- new ColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader, pageBuilder) :
79
- new ColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
97
+ final AbstractGuardColumnVisitor guradVisitor;
98
+ if (task.getWhere().isPresent()) {
99
+ guradVisitor = new GuardColumnVisitorWhereImpl(task, inputSchema, outputSchema, pageReader, parserExp);
100
+ }
101
+ else if (orCondition) {
102
+ guradVisitor = new GuardColumnVisitorOrImpl(task, inputSchema, outputSchema, pageReader);
103
+ }
104
+ else {
105
+ guradVisitor = new GuardColumnVisitorAndImpl(task, inputSchema, outputSchema, pageReader);
106
+ }
107
+
108
+ final BuildColumnVisitorImpl buildVisitor;
109
+ buildVisitor = new BuildColumnVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
80
110
 
111
+ return new PageOutput() {
81
112
  @Override
82
113
  public void finish()
83
114
  {
@@ -96,7 +127,9 @@ public class RowFilterPlugin implements FilterPlugin
96
127
  pageReader.setPage(page);
97
128
 
98
129
  while (pageReader.nextRecord()) {
99
- if (visitor.visitColumns(inputSchema)) {
130
+ if (guradVisitor.visitColumns(inputSchema)) {
131
+ // output.add(page); did not work, double release() error occurred. We need to copy from reader to builder...
132
+ outputSchema.visitColumns(buildVisitor);
100
133
  pageBuilder.addRecord();
101
134
  }
102
135
  }