embulk-filter-row 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/CHANGELOG.md +6 -0
  4. data/README.md +124 -4
  5. data/build.gradle +15 -4
  6. data/classpath/embulk-filter-row-0.3.0.jar +0 -0
  7. data/example/and.yml +0 -7
  8. data/example/example.yml +0 -7
  9. data/example/where.yml +28 -0
  10. data/script/byaccj.sh +29 -0
  11. data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
  12. data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
  13. data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
  14. data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
  15. data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
  16. data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
  17. data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
  18. data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
  19. data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
  20. data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
  21. data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
  22. data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
  23. data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
  24. data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
  25. data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
  26. data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
  27. metadata +19 -5
  28. data/classpath/embulk-filter-row-0.2.2.jar +0 -0
@@ -0,0 +1,290 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.spi.PageReader;
5
+ import org.embulk.spi.time.Timestamp;
6
+
7
+ // Operation Node of AST (Abstract Syntax Tree)
8
+ public abstract class ParserExp extends ParserNode
9
+ {
10
+ public abstract boolean eval(PageReader pageReader);
11
+ }
12
+
13
+ abstract class BinaryOpExp extends ParserExp
14
+ {
15
+ protected ParserLiteral left;
16
+ protected ParserLiteral right;
17
+ protected int operator;
18
+
19
+ public BinaryOpExp(ParserLiteral left, ParserLiteral right, int operator)
20
+ {
21
+ this.left = left;
22
+ this.right = right;
23
+ this.operator = operator;
24
+ }
25
+
26
+ public BinaryOpExp(ParserVal left, ParserVal right, int operator)
27
+ {
28
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
29
+ }
30
+ }
31
+
32
+ class BooleanOpExp extends BinaryOpExp
33
+ {
34
+ public BooleanOpExp(ParserLiteral left, ParserLiteral right, int operator)
35
+ {
36
+ super(left, right, operator);
37
+ if (! left.isBoolean()) {
38
+ throw new ConfigException(String.format("\"%s\" is not a Boolean column", ((IdentifierLiteral)left).name));
39
+ }
40
+ if (! right.isBoolean()) {
41
+ throw new ConfigException(String.format("\"%s\" is not a Boolean column", ((IdentifierLiteral)right).name));
42
+ }
43
+ }
44
+
45
+ public BooleanOpExp(ParserVal left, ParserVal right, int operator)
46
+ {
47
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
48
+ }
49
+
50
+ public boolean eval(PageReader pageReader)
51
+ {
52
+ boolean l = left.getBoolean(pageReader);
53
+ boolean r = right.getBoolean(pageReader);
54
+ if (operator == Parser.EQ) {
55
+ return l == r;
56
+ }
57
+ else if (operator == Parser.NEQ) {
58
+ return l != r;
59
+ }
60
+ else {
61
+ assert(false);
62
+ return false;
63
+ }
64
+ }
65
+ }
66
+
67
+ class NumberOpExp extends BinaryOpExp
68
+ {
69
+ public NumberOpExp(ParserLiteral left, ParserLiteral right, int operator)
70
+ {
71
+ super(left, right, operator);
72
+ if (! left.isNumber()) {
73
+ throw new ConfigException(String.format("\"%s\" is not a Number column", ((IdentifierLiteral)left).name));
74
+ }
75
+ if (! right.isNumber()) {
76
+ throw new ConfigException(String.format("\"%s\" is not a Number column", ((IdentifierLiteral)right).name));
77
+ }
78
+ }
79
+
80
+ public NumberOpExp(ParserVal left, ParserVal right, int operator)
81
+ {
82
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
83
+ }
84
+
85
+ public boolean eval(PageReader pageReader)
86
+ {
87
+ double l = left.getNumber(pageReader);
88
+ double r = right.getNumber(pageReader);
89
+ if (operator == Parser.EQ) {
90
+ return l == r;
91
+ }
92
+ else if (operator == Parser.NEQ) {
93
+ return l != r;
94
+ }
95
+ else if (operator == Parser.GT) {
96
+ return l > r;
97
+ }
98
+ else if (operator == Parser.GE) {
99
+ return l >= r;
100
+ }
101
+ else if (operator == Parser.LT) {
102
+ return l < r;
103
+ }
104
+ else if (operator == Parser.LE) {
105
+ return l <= r;
106
+ }
107
+ else {
108
+ assert(false);
109
+ return false;
110
+ }
111
+ }
112
+ }
113
+
114
+ class TimestampOpExp extends BinaryOpExp
115
+ {
116
+ public TimestampOpExp(ParserLiteral left, ParserLiteral right, int operator)
117
+ {
118
+ super(left, right, operator);
119
+ if (! left.isTimestamp()) {
120
+ throw new ConfigException(String.format("\"%s\" is not a Timestamp column", ((IdentifierLiteral)left).name));
121
+ }
122
+ if (! right.isTimestamp()) {
123
+ throw new ConfigException(String.format("\"%s\" is not a Timestamp column", ((IdentifierLiteral)right).name));
124
+ }
125
+ }
126
+
127
+ public TimestampOpExp(ParserVal left, ParserVal right, int operator)
128
+ {
129
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
130
+ }
131
+
132
+ public boolean eval(PageReader pageReader)
133
+ {
134
+ Timestamp l = left.getTimestamp(pageReader);
135
+ Timestamp r = right.getTimestamp(pageReader);
136
+ if (operator == Parser.EQ) {
137
+ return l.equals(r);
138
+ }
139
+ else if (operator == Parser.NEQ) {
140
+ return ! l.equals(r);
141
+ }
142
+ else if (operator == Parser.GT) {
143
+ return l.compareTo(r) > 0;
144
+ }
145
+ else if (operator == Parser.GE) {
146
+ return l.compareTo(r) >= 0;
147
+ }
148
+ else if (operator == Parser.LT) {
149
+ return l.compareTo(r) < 0;
150
+ }
151
+ else if (operator == Parser.LE) {
152
+ return l.compareTo(r) <= 0;
153
+ }
154
+ else {
155
+ assert(false);
156
+ return false;
157
+ }
158
+ }
159
+ }
160
+
161
+ class StringOpExp extends BinaryOpExp
162
+ {
163
+ public StringOpExp(ParserLiteral left, ParserLiteral right, int operator)
164
+ {
165
+ super(left, right, operator);
166
+ if (! left.isString()) {
167
+ throw new ConfigException(String.format("\"%s\" is not a String column", ((IdentifierLiteral)left).name));
168
+ }
169
+ if (! right.isString()) {
170
+ throw new ConfigException(String.format("\"%s\" is not a String column", ((IdentifierLiteral)right).name));
171
+ }
172
+ }
173
+
174
+ public StringOpExp(ParserVal left, ParserVal right, int operator)
175
+ {
176
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
177
+ }
178
+
179
+ public boolean eval(PageReader pageReader)
180
+ {
181
+ String l = left.getString(pageReader);
182
+ String r = right.getString(pageReader);
183
+ if (operator == Parser.EQ) {
184
+ return l.equals(r);
185
+ }
186
+ else if (operator == Parser.NEQ) {
187
+ return ! l.equals(r);
188
+ }
189
+ else if (operator == Parser.START_WITH) {
190
+ return l.startsWith(r);
191
+ }
192
+ else if (operator == Parser.END_WITH) {
193
+ return l.endsWith(r);
194
+ }
195
+ else if (operator == Parser.INCLUDE) {
196
+ return l.contains(r);
197
+ }
198
+ else {
199
+ assert(false);
200
+ return false;
201
+ }
202
+ }
203
+ }
204
+
205
+ class NullOpExp extends ParserExp
206
+ {
207
+ protected ParserLiteral val;
208
+ protected int operator;
209
+
210
+ public NullOpExp(ParserLiteral val, int operator)
211
+ {
212
+ this.val = val;
213
+ this.operator = operator;
214
+ }
215
+
216
+ public NullOpExp(ParserVal val, int operator)
217
+ {
218
+ this((ParserLiteral)(val.obj), operator);
219
+ }
220
+
221
+ public boolean eval(PageReader pageReader)
222
+ {
223
+ boolean isNull = val.isNull(pageReader);
224
+ if (operator == Parser.EQ) {
225
+ return isNull;
226
+ }
227
+ else if (operator == Parser.NEQ) {
228
+ return ! isNull;
229
+ }
230
+ else {
231
+ assert(false);
232
+ return false;
233
+ }
234
+ }
235
+ }
236
+
237
+ class LogicalOpExp extends ParserExp
238
+ {
239
+ protected ParserExp left;
240
+ protected ParserExp right;
241
+ protected int operator;
242
+
243
+ public LogicalOpExp(ParserExp left, ParserExp right, int operator)
244
+ {
245
+ this.left = left;
246
+ this.right = right;
247
+ this.operator = operator;
248
+ }
249
+
250
+ public LogicalOpExp(ParserVal left, ParserVal right, int operator)
251
+ {
252
+ this((ParserExp)(left.obj), (ParserExp)(right.obj), operator);
253
+ }
254
+
255
+ public boolean eval(PageReader pageReader)
256
+ {
257
+ boolean l = left.eval(pageReader);
258
+ boolean r = right.eval(pageReader);
259
+ if (operator == Parser.OR) {
260
+ return l || r;
261
+ }
262
+ else if (operator == Parser.AND) {
263
+ return l && r;
264
+ }
265
+ else {
266
+ assert(false);
267
+ return false;
268
+ }
269
+ }
270
+ }
271
+
272
+ class NegateOpExp extends ParserExp
273
+ {
274
+ protected ParserExp exp;
275
+
276
+ public NegateOpExp(ParserExp exp)
277
+ {
278
+ this.exp = exp;
279
+ }
280
+
281
+ public NegateOpExp(ParserVal exp)
282
+ {
283
+ this((ParserExp)(exp.obj));
284
+ }
285
+
286
+ public boolean eval(PageReader pageReader)
287
+ {
288
+ return ! exp.eval(pageReader);
289
+ }
290
+ }
@@ -0,0 +1,277 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.spi.Column;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.time.Timestamp;
9
+ import org.embulk.spi.time.TimestampParseException;
10
+ import org.embulk.spi.time.TimestampParser;
11
+ import org.embulk.spi.type.BooleanType;
12
+ import org.embulk.spi.type.DoubleType;
13
+ import org.embulk.spi.type.JsonType;
14
+ import org.embulk.spi.type.LongType;
15
+ import org.embulk.spi.type.StringType;
16
+ import org.embulk.spi.type.TimestampType;
17
+ import org.embulk.spi.type.Type;
18
+ import org.joda.time.DateTimeZone;
19
+ import org.jruby.embed.ScriptingContainer;
20
+ import org.msgpack.value.Value;
21
+
22
+ // Literal Node of AST (Abstract Syntax Tree)
23
+ public abstract class ParserLiteral extends ParserNode
24
+ {
25
+ static ScriptingContainer jruby;
26
+
27
+ public static void setJRuby(ScriptingContainer jruby)
28
+ {
29
+ ParserLiteral.jruby = jruby;
30
+ }
31
+
32
+ public boolean isBoolean()
33
+ {
34
+ return false;
35
+ }
36
+ public boolean isNumber()
37
+ {
38
+ return false;
39
+ }
40
+ public boolean isString()
41
+ {
42
+ return false;
43
+ }
44
+ public boolean isTimestamp()
45
+ {
46
+ return false;
47
+ }
48
+ public boolean isJson()
49
+ {
50
+ return false;
51
+ }
52
+
53
+ public boolean isNull(PageReader pageReader)
54
+ {
55
+ throw new RuntimeException();
56
+ }
57
+ public boolean getBoolean(PageReader pageReader)
58
+ {
59
+ throw new RuntimeException();
60
+ }
61
+ public double getNumber(PageReader pageReader)
62
+ {
63
+ throw new RuntimeException();
64
+ }
65
+ public String getString(PageReader pageReader)
66
+ {
67
+ throw new RuntimeException();
68
+ }
69
+ public Timestamp getTimestamp(PageReader pageReader)
70
+ {
71
+ throw new RuntimeException();
72
+ }
73
+ public Value getJson(PageReader pageReader)
74
+ {
75
+ throw new RuntimeException();
76
+ }
77
+ }
78
+
79
+ class BooleanLiteral extends ParserLiteral
80
+ {
81
+ public boolean val;
82
+
83
+ public BooleanLiteral(boolean val)
84
+ {
85
+ this.val = val;
86
+ }
87
+
88
+ public boolean isBoolean()
89
+ {
90
+ return true;
91
+ }
92
+
93
+ public boolean getBoolean(PageReader pageReader)
94
+ {
95
+ return val;
96
+ }
97
+ }
98
+
99
+ class NumberLiteral extends ParserLiteral
100
+ {
101
+ protected double val;
102
+
103
+ public NumberLiteral(double val)
104
+ {
105
+ this.val = val;
106
+ }
107
+
108
+ public NumberLiteral(String str)
109
+ {
110
+ this.val = Double.parseDouble(str);
111
+ }
112
+
113
+ public boolean isNumber()
114
+ {
115
+ return true;
116
+ }
117
+
118
+ public double getNumber(PageReader pageReader)
119
+ {
120
+ return val;
121
+ }
122
+ }
123
+
124
+ class StringLiteral extends ParserLiteral
125
+ {
126
+ protected String val;
127
+
128
+ public StringLiteral(String val)
129
+ {
130
+ this.val = val;
131
+ }
132
+
133
+ public boolean isString()
134
+ {
135
+ return true;
136
+ }
137
+
138
+ public String getString(PageReader pageReader)
139
+ {
140
+ return val;
141
+ }
142
+ }
143
+
144
+ class TimestampLiteral extends ParserLiteral
145
+ {
146
+ protected Timestamp val;
147
+ private static final DateTimeZone default_timezone = DateTimeZone.forID("UTC");
148
+
149
+ public TimestampLiteral(ParserVal val)
150
+ {
151
+ if (val.obj.getClass() == StringLiteral.class) {
152
+ initTimestampLiteral(((StringLiteral)val.obj).val);
153
+ }
154
+ else if (val.obj.getClass() == NumberLiteral.class) {
155
+ initTimestampLiteral(((NumberLiteral)(val.obj)).val);
156
+ }
157
+ else {
158
+ throw new RuntimeException();
159
+ }
160
+ }
161
+
162
+ public void initTimestampLiteral(String str)
163
+ {
164
+ String[] formats = {
165
+ "%Y-%m-%d %H:%M:%S.%N %z",
166
+ "%Y-%m-%d %H:%M:%S.%N",
167
+ "%Y-%m-%d %H:%M:%S %z",
168
+ "%Y-%m-%d %H:%M:%S",
169
+ "%Y-%m-%d %z",
170
+ "%Y-%m-%d",
171
+ };
172
+ Timestamp val = null;
173
+ TimestampParseException ex = null;
174
+ for (String format : formats) {
175
+ try {
176
+ TimestampParser timestampParser = new TimestampParser(jruby, format, default_timezone);
177
+ this.val = timestampParser.parse(str);
178
+ break;
179
+ }
180
+ catch (TimestampParseException e) {
181
+ ex = e;
182
+ }
183
+ }
184
+ if (this.val == null) {
185
+ throw Throwables.propagate(ex);
186
+ }
187
+ }
188
+
189
+ public void initTimestampLiteral(double epoch)
190
+ {
191
+ int epochSecond = (int) epoch;
192
+ long nanoAdjustment = (long) ((epoch - epochSecond) * 1000000000);
193
+ this.val = Timestamp.ofEpochSecond(epochSecond, nanoAdjustment);
194
+ }
195
+
196
+ public boolean isTimestamp()
197
+ {
198
+ return true;
199
+ }
200
+
201
+ public Timestamp getTimestamp(PageReader pageReader)
202
+ {
203
+ return val;
204
+ }
205
+ }
206
+
207
+ class IdentifierLiteral extends ParserLiteral
208
+ {
209
+ protected String name;
210
+ protected Column column;
211
+
212
+ public IdentifierLiteral(String name, Schema schema)
213
+ {
214
+ this.name = name;
215
+ this.column = schema.lookupColumn(name); // throw SchemaConfigException
216
+ // ToDo: Support filtering value with type: json
217
+ if (column.getType() instanceof JsonType) {
218
+ throw new ConfigException(String.format("Identifier for a json column '%s' is not supported", name));
219
+ }
220
+ }
221
+
222
+ public boolean isBoolean()
223
+ {
224
+ return (column.getType() instanceof BooleanType);
225
+ }
226
+ public boolean isNumber()
227
+ {
228
+ return (column.getType() instanceof LongType) || (column.getType() instanceof DoubleType);
229
+ }
230
+ public boolean isString()
231
+ {
232
+ return (column.getType() instanceof StringType);
233
+ }
234
+ public boolean isTimestamp()
235
+ {
236
+ return (column.getType() instanceof TimestampType);
237
+ }
238
+ public boolean isJson()
239
+ {
240
+ return (column.getType() instanceof JsonType);
241
+ }
242
+
243
+ public boolean isNull(PageReader pageReader)
244
+ {
245
+ return pageReader.isNull(column);
246
+ }
247
+
248
+ public boolean getBoolean(PageReader pageReader)
249
+ {
250
+ return pageReader.getBoolean(column);
251
+ }
252
+
253
+ public double getNumber(PageReader pageReader)
254
+ {
255
+ if (column.getType() instanceof LongType) {
256
+ return (double) pageReader.getLong(column);
257
+ }
258
+ else {
259
+ return pageReader.getDouble(column);
260
+ }
261
+ }
262
+
263
+ public String getString(PageReader pageReader)
264
+ {
265
+ return pageReader.getString(column);
266
+ }
267
+
268
+ public Timestamp getTimestamp(PageReader pageReader)
269
+ {
270
+ return pageReader.getTimestamp(column);
271
+ }
272
+
273
+ public Value getJson(PageReader pageReader)
274
+ {
275
+ return pageReader.getJson(column);
276
+ }
277
+ }
@@ -0,0 +1,6 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ // Node of AST (Abstract Syntax Tree)
4
+ public abstract class ParserNode
5
+ {
6
+ }
@@ -0,0 +1,78 @@
1
+ //#############################################
2
+ //## file: Parser.java
3
+ //## Generated by Byacc/j
4
+ //#############################################
5
+ package org.embulk.filter.row.where;
6
+
7
+ /**
8
+ * BYACC/J Semantic Value for parser: Parser
9
+ * This class provides some of the functionality
10
+ * of the yacc/C 'union' directive
11
+ */
12
+ public class ParserVal
13
+ {
14
+ /**
15
+ * integer value of this 'union'
16
+ */
17
+ public int ival;
18
+
19
+ /**
20
+ * double value of this 'union'
21
+ */
22
+ public double dval;
23
+
24
+ /**
25
+ * string value of this 'union'
26
+ */
27
+ public String sval;
28
+
29
+ /**
30
+ * object value of this 'union'
31
+ */
32
+ public Object obj;
33
+
34
+ //#############################################
35
+ //## C O N S T R U C T O R S
36
+ //#############################################
37
+ /**
38
+ * Initialize me without a value
39
+ */
40
+ public ParserVal()
41
+ {
42
+ }
43
+ /**
44
+ * Initialize me as an int
45
+ */
46
+ public ParserVal(int val)
47
+ {
48
+ ival=val;
49
+ }
50
+
51
+ /**
52
+ * Initialize me as a double
53
+ */
54
+ public ParserVal(double val)
55
+ {
56
+ dval=val;
57
+ }
58
+
59
+ /**
60
+ * Initialize me as a string
61
+ */
62
+ public ParserVal(String val)
63
+ {
64
+ sval=val;
65
+ }
66
+
67
+ /**
68
+ * Initialize me as an Object
69
+ */
70
+ public ParserVal(Object val)
71
+ {
72
+ obj=val;
73
+ }
74
+ }//end class
75
+
76
+ //#############################################
77
+ //## E N D O F F I L E
78
+ //#############################################