embulk-filter-row 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/CHANGELOG.md +6 -0
  4. data/README.md +124 -4
  5. data/build.gradle +15 -4
  6. data/classpath/embulk-filter-row-0.3.0.jar +0 -0
  7. data/example/and.yml +0 -7
  8. data/example/example.yml +0 -7
  9. data/example/where.yml +28 -0
  10. data/script/byaccj.sh +29 -0
  11. data/src/main/java/org/embulk/filter/row/{AbstractColumnVisitor.java → AbstractGuardColumnVisitor.java} +9 -17
  12. data/src/main/java/org/embulk/filter/row/BuildColumnVisitorImpl.java +98 -0
  13. data/src/main/java/org/embulk/filter/row/{ColumnVisitorAndImpl.java → GuardColumnVisitorAndImpl.java} +11 -45
  14. data/src/main/java/org/embulk/filter/row/{ColumnVisitorOrImpl.java → GuardColumnVisitorOrImpl.java} +11 -45
  15. data/src/main/java/org/embulk/filter/row/GuardColumnVisitorWhereImpl.java +28 -0
  16. data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +49 -16
  17. data/src/main/java/org/embulk/filter/row/where/Parser.java +831 -0
  18. data/src/main/java/org/embulk/filter/row/where/ParserExp.java +290 -0
  19. data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +277 -0
  20. data/src/main/java/org/embulk/filter/row/where/ParserNode.java +6 -0
  21. data/src/main/java/org/embulk/filter/row/where/ParserVal.java +78 -0
  22. data/src/main/java/org/embulk/filter/row/where/Yylex.java +833 -0
  23. data/src/main/java/org/embulk/filter/row/where/_lexer.l +108 -0
  24. data/src/main/java/org/embulk/filter/row/where/_parser.y +137 -0
  25. data/src/test/java/org/embulk/filter/row/where/TestParser.java +383 -0
  26. data/src/test/java/org/embulk/filter/row/where/TestYylex.java +256 -0
  27. metadata +19 -5
  28. data/classpath/embulk-filter-row-0.2.2.jar +0 -0
@@ -0,0 +1,290 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.spi.PageReader;
5
+ import org.embulk.spi.time.Timestamp;
6
+
7
+ // Operation Node of AST (Abstract Syntax Tree)
8
+ public abstract class ParserExp extends ParserNode
9
+ {
10
+ public abstract boolean eval(PageReader pageReader);
11
+ }
12
+
13
+ abstract class BinaryOpExp extends ParserExp
14
+ {
15
+ protected ParserLiteral left;
16
+ protected ParserLiteral right;
17
+ protected int operator;
18
+
19
+ public BinaryOpExp(ParserLiteral left, ParserLiteral right, int operator)
20
+ {
21
+ this.left = left;
22
+ this.right = right;
23
+ this.operator = operator;
24
+ }
25
+
26
+ public BinaryOpExp(ParserVal left, ParserVal right, int operator)
27
+ {
28
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
29
+ }
30
+ }
31
+
32
+ class BooleanOpExp extends BinaryOpExp
33
+ {
34
+ public BooleanOpExp(ParserLiteral left, ParserLiteral right, int operator)
35
+ {
36
+ super(left, right, operator);
37
+ if (! left.isBoolean()) {
38
+ throw new ConfigException(String.format("\"%s\" is not a Boolean column", ((IdentifierLiteral)left).name));
39
+ }
40
+ if (! right.isBoolean()) {
41
+ throw new ConfigException(String.format("\"%s\" is not a Boolean column", ((IdentifierLiteral)right).name));
42
+ }
43
+ }
44
+
45
+ public BooleanOpExp(ParserVal left, ParserVal right, int operator)
46
+ {
47
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
48
+ }
49
+
50
+ public boolean eval(PageReader pageReader)
51
+ {
52
+ boolean l = left.getBoolean(pageReader);
53
+ boolean r = right.getBoolean(pageReader);
54
+ if (operator == Parser.EQ) {
55
+ return l == r;
56
+ }
57
+ else if (operator == Parser.NEQ) {
58
+ return l != r;
59
+ }
60
+ else {
61
+ assert(false);
62
+ return false;
63
+ }
64
+ }
65
+ }
66
+
67
+ class NumberOpExp extends BinaryOpExp
68
+ {
69
+ public NumberOpExp(ParserLiteral left, ParserLiteral right, int operator)
70
+ {
71
+ super(left, right, operator);
72
+ if (! left.isNumber()) {
73
+ throw new ConfigException(String.format("\"%s\" is not a Number column", ((IdentifierLiteral)left).name));
74
+ }
75
+ if (! right.isNumber()) {
76
+ throw new ConfigException(String.format("\"%s\" is not a Number column", ((IdentifierLiteral)right).name));
77
+ }
78
+ }
79
+
80
+ public NumberOpExp(ParserVal left, ParserVal right, int operator)
81
+ {
82
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
83
+ }
84
+
85
+ public boolean eval(PageReader pageReader)
86
+ {
87
+ double l = left.getNumber(pageReader);
88
+ double r = right.getNumber(pageReader);
89
+ if (operator == Parser.EQ) {
90
+ return l == r;
91
+ }
92
+ else if (operator == Parser.NEQ) {
93
+ return l != r;
94
+ }
95
+ else if (operator == Parser.GT) {
96
+ return l > r;
97
+ }
98
+ else if (operator == Parser.GE) {
99
+ return l >= r;
100
+ }
101
+ else if (operator == Parser.LT) {
102
+ return l < r;
103
+ }
104
+ else if (operator == Parser.LE) {
105
+ return l <= r;
106
+ }
107
+ else {
108
+ assert(false);
109
+ return false;
110
+ }
111
+ }
112
+ }
113
+
114
+ class TimestampOpExp extends BinaryOpExp
115
+ {
116
+ public TimestampOpExp(ParserLiteral left, ParserLiteral right, int operator)
117
+ {
118
+ super(left, right, operator);
119
+ if (! left.isTimestamp()) {
120
+ throw new ConfigException(String.format("\"%s\" is not a Timestamp column", ((IdentifierLiteral)left).name));
121
+ }
122
+ if (! right.isTimestamp()) {
123
+ throw new ConfigException(String.format("\"%s\" is not a Timestamp column", ((IdentifierLiteral)right).name));
124
+ }
125
+ }
126
+
127
+ public TimestampOpExp(ParserVal left, ParserVal right, int operator)
128
+ {
129
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
130
+ }
131
+
132
+ public boolean eval(PageReader pageReader)
133
+ {
134
+ Timestamp l = left.getTimestamp(pageReader);
135
+ Timestamp r = right.getTimestamp(pageReader);
136
+ if (operator == Parser.EQ) {
137
+ return l.equals(r);
138
+ }
139
+ else if (operator == Parser.NEQ) {
140
+ return ! l.equals(r);
141
+ }
142
+ else if (operator == Parser.GT) {
143
+ return l.compareTo(r) > 0;
144
+ }
145
+ else if (operator == Parser.GE) {
146
+ return l.compareTo(r) >= 0;
147
+ }
148
+ else if (operator == Parser.LT) {
149
+ return l.compareTo(r) < 0;
150
+ }
151
+ else if (operator == Parser.LE) {
152
+ return l.compareTo(r) <= 0;
153
+ }
154
+ else {
155
+ assert(false);
156
+ return false;
157
+ }
158
+ }
159
+ }
160
+
161
+ class StringOpExp extends BinaryOpExp
162
+ {
163
+ public StringOpExp(ParserLiteral left, ParserLiteral right, int operator)
164
+ {
165
+ super(left, right, operator);
166
+ if (! left.isString()) {
167
+ throw new ConfigException(String.format("\"%s\" is not a String column", ((IdentifierLiteral)left).name));
168
+ }
169
+ if (! right.isString()) {
170
+ throw new ConfigException(String.format("\"%s\" is not a String column", ((IdentifierLiteral)right).name));
171
+ }
172
+ }
173
+
174
+ public StringOpExp(ParserVal left, ParserVal right, int operator)
175
+ {
176
+ this((ParserLiteral)(left.obj), (ParserLiteral)(right.obj), operator);
177
+ }
178
+
179
+ public boolean eval(PageReader pageReader)
180
+ {
181
+ String l = left.getString(pageReader);
182
+ String r = right.getString(pageReader);
183
+ if (operator == Parser.EQ) {
184
+ return l.equals(r);
185
+ }
186
+ else if (operator == Parser.NEQ) {
187
+ return ! l.equals(r);
188
+ }
189
+ else if (operator == Parser.START_WITH) {
190
+ return l.startsWith(r);
191
+ }
192
+ else if (operator == Parser.END_WITH) {
193
+ return l.endsWith(r);
194
+ }
195
+ else if (operator == Parser.INCLUDE) {
196
+ return l.contains(r);
197
+ }
198
+ else {
199
+ assert(false);
200
+ return false;
201
+ }
202
+ }
203
+ }
204
+
205
+ class NullOpExp extends ParserExp
206
+ {
207
+ protected ParserLiteral val;
208
+ protected int operator;
209
+
210
+ public NullOpExp(ParserLiteral val, int operator)
211
+ {
212
+ this.val = val;
213
+ this.operator = operator;
214
+ }
215
+
216
+ public NullOpExp(ParserVal val, int operator)
217
+ {
218
+ this((ParserLiteral)(val.obj), operator);
219
+ }
220
+
221
+ public boolean eval(PageReader pageReader)
222
+ {
223
+ boolean isNull = val.isNull(pageReader);
224
+ if (operator == Parser.EQ) {
225
+ return isNull;
226
+ }
227
+ else if (operator == Parser.NEQ) {
228
+ return ! isNull;
229
+ }
230
+ else {
231
+ assert(false);
232
+ return false;
233
+ }
234
+ }
235
+ }
236
+
237
+ class LogicalOpExp extends ParserExp
238
+ {
239
+ protected ParserExp left;
240
+ protected ParserExp right;
241
+ protected int operator;
242
+
243
+ public LogicalOpExp(ParserExp left, ParserExp right, int operator)
244
+ {
245
+ this.left = left;
246
+ this.right = right;
247
+ this.operator = operator;
248
+ }
249
+
250
+ public LogicalOpExp(ParserVal left, ParserVal right, int operator)
251
+ {
252
+ this((ParserExp)(left.obj), (ParserExp)(right.obj), operator);
253
+ }
254
+
255
+ public boolean eval(PageReader pageReader)
256
+ {
257
+ boolean l = left.eval(pageReader);
258
+ boolean r = right.eval(pageReader);
259
+ if (operator == Parser.OR) {
260
+ return l || r;
261
+ }
262
+ else if (operator == Parser.AND) {
263
+ return l && r;
264
+ }
265
+ else {
266
+ assert(false);
267
+ return false;
268
+ }
269
+ }
270
+ }
271
+
272
+ class NegateOpExp extends ParserExp
273
+ {
274
+ protected ParserExp exp;
275
+
276
+ public NegateOpExp(ParserExp exp)
277
+ {
278
+ this.exp = exp;
279
+ }
280
+
281
+ public NegateOpExp(ParserVal exp)
282
+ {
283
+ this((ParserExp)(exp.obj));
284
+ }
285
+
286
+ public boolean eval(PageReader pageReader)
287
+ {
288
+ return ! exp.eval(pageReader);
289
+ }
290
+ }
@@ -0,0 +1,277 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.spi.Column;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.time.Timestamp;
9
+ import org.embulk.spi.time.TimestampParseException;
10
+ import org.embulk.spi.time.TimestampParser;
11
+ import org.embulk.spi.type.BooleanType;
12
+ import org.embulk.spi.type.DoubleType;
13
+ import org.embulk.spi.type.JsonType;
14
+ import org.embulk.spi.type.LongType;
15
+ import org.embulk.spi.type.StringType;
16
+ import org.embulk.spi.type.TimestampType;
17
+ import org.embulk.spi.type.Type;
18
+ import org.joda.time.DateTimeZone;
19
+ import org.jruby.embed.ScriptingContainer;
20
+ import org.msgpack.value.Value;
21
+
22
+ // Literal Node of AST (Abstract Syntax Tree)
23
+ public abstract class ParserLiteral extends ParserNode
24
+ {
25
+ static ScriptingContainer jruby;
26
+
27
+ public static void setJRuby(ScriptingContainer jruby)
28
+ {
29
+ ParserLiteral.jruby = jruby;
30
+ }
31
+
32
+ public boolean isBoolean()
33
+ {
34
+ return false;
35
+ }
36
+ public boolean isNumber()
37
+ {
38
+ return false;
39
+ }
40
+ public boolean isString()
41
+ {
42
+ return false;
43
+ }
44
+ public boolean isTimestamp()
45
+ {
46
+ return false;
47
+ }
48
+ public boolean isJson()
49
+ {
50
+ return false;
51
+ }
52
+
53
+ public boolean isNull(PageReader pageReader)
54
+ {
55
+ throw new RuntimeException();
56
+ }
57
+ public boolean getBoolean(PageReader pageReader)
58
+ {
59
+ throw new RuntimeException();
60
+ }
61
+ public double getNumber(PageReader pageReader)
62
+ {
63
+ throw new RuntimeException();
64
+ }
65
+ public String getString(PageReader pageReader)
66
+ {
67
+ throw new RuntimeException();
68
+ }
69
+ public Timestamp getTimestamp(PageReader pageReader)
70
+ {
71
+ throw new RuntimeException();
72
+ }
73
+ public Value getJson(PageReader pageReader)
74
+ {
75
+ throw new RuntimeException();
76
+ }
77
+ }
78
+
79
+ class BooleanLiteral extends ParserLiteral
80
+ {
81
+ public boolean val;
82
+
83
+ public BooleanLiteral(boolean val)
84
+ {
85
+ this.val = val;
86
+ }
87
+
88
+ public boolean isBoolean()
89
+ {
90
+ return true;
91
+ }
92
+
93
+ public boolean getBoolean(PageReader pageReader)
94
+ {
95
+ return val;
96
+ }
97
+ }
98
+
99
+ class NumberLiteral extends ParserLiteral
100
+ {
101
+ protected double val;
102
+
103
+ public NumberLiteral(double val)
104
+ {
105
+ this.val = val;
106
+ }
107
+
108
+ public NumberLiteral(String str)
109
+ {
110
+ this.val = Double.parseDouble(str);
111
+ }
112
+
113
+ public boolean isNumber()
114
+ {
115
+ return true;
116
+ }
117
+
118
+ public double getNumber(PageReader pageReader)
119
+ {
120
+ return val;
121
+ }
122
+ }
123
+
124
+ class StringLiteral extends ParserLiteral
125
+ {
126
+ protected String val;
127
+
128
+ public StringLiteral(String val)
129
+ {
130
+ this.val = val;
131
+ }
132
+
133
+ public boolean isString()
134
+ {
135
+ return true;
136
+ }
137
+
138
+ public String getString(PageReader pageReader)
139
+ {
140
+ return val;
141
+ }
142
+ }
143
+
144
+ class TimestampLiteral extends ParserLiteral
145
+ {
146
+ protected Timestamp val;
147
+ private static final DateTimeZone default_timezone = DateTimeZone.forID("UTC");
148
+
149
+ public TimestampLiteral(ParserVal val)
150
+ {
151
+ if (val.obj.getClass() == StringLiteral.class) {
152
+ initTimestampLiteral(((StringLiteral)val.obj).val);
153
+ }
154
+ else if (val.obj.getClass() == NumberLiteral.class) {
155
+ initTimestampLiteral(((NumberLiteral)(val.obj)).val);
156
+ }
157
+ else {
158
+ throw new RuntimeException();
159
+ }
160
+ }
161
+
162
+ public void initTimestampLiteral(String str)
163
+ {
164
+ String[] formats = {
165
+ "%Y-%m-%d %H:%M:%S.%N %z",
166
+ "%Y-%m-%d %H:%M:%S.%N",
167
+ "%Y-%m-%d %H:%M:%S %z",
168
+ "%Y-%m-%d %H:%M:%S",
169
+ "%Y-%m-%d %z",
170
+ "%Y-%m-%d",
171
+ };
172
+ Timestamp val = null;
173
+ TimestampParseException ex = null;
174
+ for (String format : formats) {
175
+ try {
176
+ TimestampParser timestampParser = new TimestampParser(jruby, format, default_timezone);
177
+ this.val = timestampParser.parse(str);
178
+ break;
179
+ }
180
+ catch (TimestampParseException e) {
181
+ ex = e;
182
+ }
183
+ }
184
+ if (this.val == null) {
185
+ throw Throwables.propagate(ex);
186
+ }
187
+ }
188
+
189
+ public void initTimestampLiteral(double epoch)
190
+ {
191
+ int epochSecond = (int) epoch;
192
+ long nanoAdjustment = (long) ((epoch - epochSecond) * 1000000000);
193
+ this.val = Timestamp.ofEpochSecond(epochSecond, nanoAdjustment);
194
+ }
195
+
196
+ public boolean isTimestamp()
197
+ {
198
+ return true;
199
+ }
200
+
201
+ public Timestamp getTimestamp(PageReader pageReader)
202
+ {
203
+ return val;
204
+ }
205
+ }
206
+
207
+ class IdentifierLiteral extends ParserLiteral
208
+ {
209
+ protected String name;
210
+ protected Column column;
211
+
212
+ public IdentifierLiteral(String name, Schema schema)
213
+ {
214
+ this.name = name;
215
+ this.column = schema.lookupColumn(name); // throw SchemaConfigException
216
+ // ToDo: Support filtering value with type: json
217
+ if (column.getType() instanceof JsonType) {
218
+ throw new ConfigException(String.format("Identifier for a json column '%s' is not supported", name));
219
+ }
220
+ }
221
+
222
+ public boolean isBoolean()
223
+ {
224
+ return (column.getType() instanceof BooleanType);
225
+ }
226
+ public boolean isNumber()
227
+ {
228
+ return (column.getType() instanceof LongType) || (column.getType() instanceof DoubleType);
229
+ }
230
+ public boolean isString()
231
+ {
232
+ return (column.getType() instanceof StringType);
233
+ }
234
+ public boolean isTimestamp()
235
+ {
236
+ return (column.getType() instanceof TimestampType);
237
+ }
238
+ public boolean isJson()
239
+ {
240
+ return (column.getType() instanceof JsonType);
241
+ }
242
+
243
+ public boolean isNull(PageReader pageReader)
244
+ {
245
+ return pageReader.isNull(column);
246
+ }
247
+
248
+ public boolean getBoolean(PageReader pageReader)
249
+ {
250
+ return pageReader.getBoolean(column);
251
+ }
252
+
253
+ public double getNumber(PageReader pageReader)
254
+ {
255
+ if (column.getType() instanceof LongType) {
256
+ return (double) pageReader.getLong(column);
257
+ }
258
+ else {
259
+ return pageReader.getDouble(column);
260
+ }
261
+ }
262
+
263
+ public String getString(PageReader pageReader)
264
+ {
265
+ return pageReader.getString(column);
266
+ }
267
+
268
+ public Timestamp getTimestamp(PageReader pageReader)
269
+ {
270
+ return pageReader.getTimestamp(column);
271
+ }
272
+
273
+ public Value getJson(PageReader pageReader)
274
+ {
275
+ return pageReader.getJson(column);
276
+ }
277
+ }
@@ -0,0 +1,6 @@
1
+ package org.embulk.filter.row.where;
2
+
3
+ // Node of AST (Abstract Syntax Tree)
4
+ public abstract class ParserNode
5
+ {
6
+ }
@@ -0,0 +1,78 @@
1
+ //#############################################
2
+ //## file: Parser.java
3
+ //## Generated by Byacc/j
4
+ //#############################################
5
+ package org.embulk.filter.row.where;
6
+
7
+ /**
8
+ * BYACC/J Semantic Value for parser: Parser
9
+ * This class provides some of the functionality
10
+ * of the yacc/C 'union' directive
11
+ */
12
+ public class ParserVal
13
+ {
14
+ /**
15
+ * integer value of this 'union'
16
+ */
17
+ public int ival;
18
+
19
+ /**
20
+ * double value of this 'union'
21
+ */
22
+ public double dval;
23
+
24
+ /**
25
+ * string value of this 'union'
26
+ */
27
+ public String sval;
28
+
29
+ /**
30
+ * object value of this 'union'
31
+ */
32
+ public Object obj;
33
+
34
+ //#############################################
35
+ //## C O N S T R U C T O R S
36
+ //#############################################
37
+ /**
38
+ * Initialize me without a value
39
+ */
40
+ public ParserVal()
41
+ {
42
+ }
43
+ /**
44
+ * Initialize me as an int
45
+ */
46
+ public ParserVal(int val)
47
+ {
48
+ ival=val;
49
+ }
50
+
51
+ /**
52
+ * Initialize me as a double
53
+ */
54
+ public ParserVal(double val)
55
+ {
56
+ dval=val;
57
+ }
58
+
59
+ /**
60
+ * Initialize me as a string
61
+ */
62
+ public ParserVal(String val)
63
+ {
64
+ sval=val;
65
+ }
66
+
67
+ /**
68
+ * Initialize me as an Object
69
+ */
70
+ public ParserVal(Object val)
71
+ {
72
+ obj=val;
73
+ }
74
+ }//end class
75
+
76
+ //#############################################
77
+ //## E N D O F F I L E
78
+ //#############################################