embulk-filter-row 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +16 -13
- data/build.gradle +3 -3
- data/src/main/java/org/embulk/filter/row/AbstractGuardColumnVisitor.java +1 -1
- data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +0 -1
- data/src/main/java/org/embulk/filter/row/condition/ConditionFactory.java +2 -5
- data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +1 -8
- data/src/test/java/org/embulk/filter/row/condition/TestConditionFactory.java +19 -31
- data/src/test/java/org/embulk/filter/row/where/TestParser.java +0 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54615ee2184dfcb3791aa6d92922f1f64a9905a5
|
4
|
+
data.tar.gz: 5558f2a44121b9fdbfcee09d4f1d048ef934d20c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64168aceba7a817f404a6520c3e6ba5e499e9797bb6dcf97ca71eec26684a40f93f94a687b3e8284799c4ab6e09343cef416f68159adca30546c919da8f29390
|
7
|
+
data.tar.gz: be28eba1b0afa8edbeae6c2857ee645ce9319b2cf10eac6ce57720321789121676ce3706dc280051bd887b51a05b61d58be6913e7638670fb1b210e92a31a852
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@ A filter plugin for Embulk to filter out rows
|
|
6
6
|
|
7
7
|
## Configuration
|
8
8
|
|
9
|
-
|
9
|
+
Requirement: version >= 0.3.0
|
10
10
|
|
11
11
|
* **where**: Select only rows which matches with conditions written in SQL-like syntax. See [SQL-like Syntax](#sql-like-syntax)
|
12
12
|
|
@@ -23,13 +23,13 @@ filters:
|
|
23
23
|
- type: row
|
24
24
|
where: |-
|
25
25
|
(
|
26
|
-
|
27
|
-
|
26
|
+
string_column START_WITH 'str' AND
|
27
|
+
number_column > 1.0
|
28
28
|
)
|
29
29
|
OR
|
30
30
|
(
|
31
|
-
|
32
|
-
"
|
31
|
+
time_column = TIMESTAMP '2016-01-01 +0900' AND
|
32
|
+
"true_column" = true
|
33
33
|
)
|
34
34
|
```
|
35
35
|
|
@@ -37,20 +37,22 @@ See [SQL-like Syntax](#sql-like-syntax) for more details
|
|
37
37
|
|
38
38
|
# SQL-like Syntax
|
39
39
|
|
40
|
-
|
40
|
+
```sql
|
41
|
+
where: column1 = 'str'
|
42
|
+
```
|
41
43
|
|
42
44
|
```sql
|
43
45
|
where: |-
|
44
46
|
(
|
45
|
-
|
46
|
-
|
47
|
+
string_column START_WITH 'str' AND
|
48
|
+
number_column > 1.0
|
47
49
|
|
48
50
|
)
|
49
51
|
OR
|
50
52
|
(
|
51
|
-
|
52
|
-
"
|
53
|
-
|
53
|
+
time_column = TIMESTAMP '2016-01-01 +0900' AND
|
54
|
+
"true_column" = true AND
|
55
|
+
string_column REGEXP '^reg'
|
54
56
|
)
|
55
57
|
```
|
56
58
|
|
@@ -227,8 +229,9 @@ filters:
|
|
227
229
|
## Comparisions
|
228
230
|
|
229
231
|
* [embulk-filter-calcite](https://github.com/muga/embulk-filter-calcite)
|
230
|
-
* embulk-filter-calcite is a pretty nice plugin which enables us to write SQL query to filter embulk records
|
231
|
-
*
|
232
|
+
* embulk-filter-calcite is a pretty nice plugin which enables us to write SQL query to filter embulk records, not only `WHERE` but also `SELECT`.
|
233
|
+
* Based on [my benchmark (Japanese)](http://qiita.com/sonots/items/a70482d29862de87624d), embulk-filter-row was faster than embulk-filter-calcite.
|
234
|
+
* Choose which to use as your demand.
|
232
235
|
|
233
236
|
## ToDo
|
234
237
|
|
data/build.gradle
CHANGED
@@ -15,13 +15,13 @@ configurations {
|
|
15
15
|
provided
|
16
16
|
}
|
17
17
|
|
18
|
-
version = "0.
|
18
|
+
version = "0.5.0"
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8
|
24
|
-
provided "org.embulk:embulk-core:0.8
|
23
|
+
compile "org.embulk:embulk-core:0.8.29+"
|
24
|
+
provided "org.embulk:embulk-core:0.8.29+"
|
25
25
|
|
26
26
|
compile "org.jruby.joni:joni:2.1.11"
|
27
27
|
compile "org.jruby.jcodings:jcodings:1.0.18"
|
@@ -40,7 +40,7 @@ abstract class AbstractGuardColumnVisitor
|
|
40
40
|
String columnName = conditionConfig.getColumn();
|
41
41
|
for (Column column : outputSchema.getColumns()) {
|
42
42
|
if (columnName.equals(column.getName())) {
|
43
|
-
ConditionFactory factory = new ConditionFactory(
|
43
|
+
ConditionFactory factory = new ConditionFactory(column, conditionConfig);
|
44
44
|
Condition condition = factory.createCondition();
|
45
45
|
conditionMap.get(columnName).add(condition);
|
46
46
|
break;
|
@@ -55,7 +55,6 @@ public class RowFilterPlugin implements FilterPlugin
|
|
55
55
|
FilterPlugin.Control control)
|
56
56
|
{
|
57
57
|
PluginTask task = config.loadConfig(PluginTask.class);
|
58
|
-
ParserLiteral.setJRuby(task.getJRuby());
|
59
58
|
|
60
59
|
configure(task, inputSchema);
|
61
60
|
Schema outputSchema = inputSchema;
|
@@ -15,11 +15,9 @@ import org.embulk.spi.type.StringType;
|
|
15
15
|
import org.embulk.spi.type.TimestampType;
|
16
16
|
import org.embulk.spi.type.Type;
|
17
17
|
import org.joda.time.DateTimeZone;
|
18
|
-
import org.jruby.embed.ScriptingContainer;
|
19
18
|
|
20
19
|
public class ConditionFactory
|
21
20
|
{
|
22
|
-
private final ScriptingContainer jruby;
|
23
21
|
private Column column;
|
24
22
|
private String columnName;
|
25
23
|
private Type columnType;
|
@@ -27,9 +25,8 @@ public class ConditionFactory
|
|
27
25
|
private String operator;
|
28
26
|
private boolean not;
|
29
27
|
|
30
|
-
public ConditionFactory(
|
28
|
+
public ConditionFactory(Column column, ConditionConfig conditionConfig)
|
31
29
|
{
|
32
|
-
this.jruby = jruby;
|
33
30
|
this.column = column;
|
34
31
|
this.columnName = column.getName();
|
35
32
|
this.columnType = column.getType();
|
@@ -140,7 +137,7 @@ public class ConditionFactory
|
|
140
137
|
String format = (String) conditionConfig.getFormat().get();
|
141
138
|
DateTimeZone timezone = DateTimeZone.forID((String) conditionConfig.getTimezone().get());
|
142
139
|
|
143
|
-
TimestampParser parser = new TimestampParser(
|
140
|
+
TimestampParser parser = new TimestampParser(format, timezone);
|
144
141
|
try {
|
145
142
|
Timestamp timestamp = parser.parse(argument);
|
146
143
|
return new TimestampCondition(operator, timestamp, not);
|
@@ -16,20 +16,13 @@ import org.embulk.spi.type.StringType;
|
|
16
16
|
import org.embulk.spi.type.TimestampType;
|
17
17
|
import org.embulk.spi.type.Type;
|
18
18
|
import org.joda.time.DateTimeZone;
|
19
|
-
import org.jruby.embed.ScriptingContainer;
|
20
19
|
import org.msgpack.value.Value;
|
21
20
|
|
22
21
|
// Literal Node of AST (Abstract Syntax Tree)
|
23
22
|
public abstract class ParserLiteral extends ParserNode
|
24
23
|
{
|
25
|
-
protected static ScriptingContainer jruby;
|
26
24
|
protected String yytext;
|
27
25
|
|
28
|
-
public static void setJRuby(ScriptingContainer jruby)
|
29
|
-
{
|
30
|
-
ParserLiteral.jruby = jruby;
|
31
|
-
}
|
32
|
-
|
33
26
|
public boolean isBoolean()
|
34
27
|
{
|
35
28
|
return false;
|
@@ -185,7 +178,7 @@ class TimestampLiteral extends ParserLiteral
|
|
185
178
|
TimestampParseException ex = null;
|
186
179
|
for (String format : formats) {
|
187
180
|
try {
|
188
|
-
TimestampParser timestampParser = new TimestampParser(
|
181
|
+
TimestampParser timestampParser = new TimestampParser(format, default_timezone);
|
189
182
|
this.val = timestampParser.parse(literal.val);
|
190
183
|
break;
|
191
184
|
}
|
@@ -6,7 +6,6 @@ import org.embulk.config.ConfigException;
|
|
6
6
|
import org.embulk.config.TaskSource;
|
7
7
|
import org.embulk.spi.Column;
|
8
8
|
|
9
|
-
import org.jruby.embed.ScriptingContainer;
|
10
9
|
import org.junit.Test;
|
11
10
|
|
12
11
|
import static org.embulk.spi.type.Types.BOOLEAN;
|
@@ -63,11 +62,8 @@ public class TestConditionFactory
|
|
63
62
|
}
|
64
63
|
}
|
65
64
|
|
66
|
-
private final ScriptingContainer jruby;
|
67
|
-
|
68
65
|
public TestConditionFactory()
|
69
66
|
{
|
70
|
-
jruby = new ScriptingContainer();
|
71
67
|
}
|
72
68
|
|
73
69
|
@Test
|
@@ -83,7 +79,7 @@ public class TestConditionFactory
|
|
83
79
|
return Optional.of("IS NULL");
|
84
80
|
}
|
85
81
|
};
|
86
|
-
condition = (BooleanCondition) new ConditionFactory(
|
82
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
87
83
|
assertTrue(condition.compare(null));
|
88
84
|
|
89
85
|
config = new DefaultConditionConfig() {
|
@@ -97,7 +93,7 @@ public class TestConditionFactory
|
|
97
93
|
}
|
98
94
|
};
|
99
95
|
try {
|
100
|
-
condition = (BooleanCondition) new ConditionFactory(
|
96
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
101
97
|
fail("Argument is required");
|
102
98
|
}
|
103
99
|
catch (ConfigException e) {
|
@@ -113,7 +109,7 @@ public class TestConditionFactory
|
|
113
109
|
return Optional.of((Object) new Boolean(true));
|
114
110
|
}
|
115
111
|
};
|
116
|
-
condition = (BooleanCondition) new ConditionFactory(
|
112
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
117
113
|
assertTrue(condition.compare(new Boolean(true)));
|
118
114
|
|
119
115
|
config = new DefaultConditionConfig() {
|
@@ -127,7 +123,7 @@ public class TestConditionFactory
|
|
127
123
|
}
|
128
124
|
};
|
129
125
|
try {
|
130
|
-
condition = (BooleanCondition) new ConditionFactory(
|
126
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
131
127
|
fail("Argument type mismatch");
|
132
128
|
}
|
133
129
|
catch (ConfigException e) {
|
@@ -147,7 +143,7 @@ public class TestConditionFactory
|
|
147
143
|
return Optional.of("IS NULL");
|
148
144
|
}
|
149
145
|
};
|
150
|
-
condition = (DoubleCondition) new ConditionFactory(
|
146
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
151
147
|
assertTrue(condition.compare(null));
|
152
148
|
|
153
149
|
config = new DefaultConditionConfig() {
|
@@ -161,7 +157,7 @@ public class TestConditionFactory
|
|
161
157
|
}
|
162
158
|
};
|
163
159
|
try {
|
164
|
-
condition = (DoubleCondition) new ConditionFactory(
|
160
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
165
161
|
fail("Argument is required");
|
166
162
|
}
|
167
163
|
catch (ConfigException e) {
|
@@ -177,7 +173,7 @@ public class TestConditionFactory
|
|
177
173
|
return Optional.of((Object) new Double(10));
|
178
174
|
}
|
179
175
|
};
|
180
|
-
condition = (DoubleCondition) new ConditionFactory(
|
176
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
181
177
|
assertTrue(condition.compare(new Double(10)));
|
182
178
|
|
183
179
|
config = new DefaultConditionConfig() {
|
@@ -191,7 +187,7 @@ public class TestConditionFactory
|
|
191
187
|
}
|
192
188
|
};
|
193
189
|
try {
|
194
|
-
condition = (DoubleCondition) new ConditionFactory(
|
190
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
195
191
|
fail("Argument type mismatch");
|
196
192
|
}
|
197
193
|
catch (ConfigException e) {
|
@@ -211,7 +207,7 @@ public class TestConditionFactory
|
|
211
207
|
return Optional.of("IS NULL");
|
212
208
|
}
|
213
209
|
};
|
214
|
-
condition = (LongCondition) new ConditionFactory(
|
210
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
215
211
|
assertTrue(condition.compare(null));
|
216
212
|
|
217
213
|
config = new DefaultConditionConfig() {
|
@@ -225,7 +221,7 @@ public class TestConditionFactory
|
|
225
221
|
}
|
226
222
|
};
|
227
223
|
try {
|
228
|
-
condition = (LongCondition) new ConditionFactory(
|
224
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
229
225
|
fail("Argument is required");
|
230
226
|
}
|
231
227
|
catch (ConfigException e) {
|
@@ -241,7 +237,7 @@ public class TestConditionFactory
|
|
241
237
|
return Optional.of((Object) new Long(10));
|
242
238
|
}
|
243
239
|
};
|
244
|
-
condition = (LongCondition) new ConditionFactory(
|
240
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
245
241
|
assertTrue(condition.compare(new Long(10)));
|
246
242
|
|
247
243
|
config = new DefaultConditionConfig() {
|
@@ -255,7 +251,7 @@ public class TestConditionFactory
|
|
255
251
|
}
|
256
252
|
};
|
257
253
|
try {
|
258
|
-
condition = (LongCondition) new ConditionFactory(
|
254
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
259
255
|
fail("Argument type mismatch");
|
260
256
|
}
|
261
257
|
catch (ConfigException e) {
|
@@ -275,7 +271,7 @@ public class TestConditionFactory
|
|
275
271
|
return Optional.of("IS NULL");
|
276
272
|
}
|
277
273
|
};
|
278
|
-
condition = (StringCondition) new ConditionFactory(
|
274
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
279
275
|
assertTrue(condition.compare(null));
|
280
276
|
|
281
277
|
config = new DefaultConditionConfig() {
|
@@ -289,7 +285,7 @@ public class TestConditionFactory
|
|
289
285
|
}
|
290
286
|
};
|
291
287
|
try {
|
292
|
-
condition = (StringCondition) new ConditionFactory(
|
288
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
293
289
|
fail("Argument is required");
|
294
290
|
}
|
295
291
|
catch (ConfigException e) {
|
@@ -305,7 +301,7 @@ public class TestConditionFactory
|
|
305
301
|
return Optional.of((Object) "foo");
|
306
302
|
}
|
307
303
|
};
|
308
|
-
condition = (StringCondition) new ConditionFactory(
|
304
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
309
305
|
assertTrue(condition.compare("foo"));
|
310
306
|
|
311
307
|
config = new DefaultConditionConfig() {
|
@@ -319,7 +315,7 @@ public class TestConditionFactory
|
|
319
315
|
}
|
320
316
|
};
|
321
317
|
try {
|
322
|
-
condition = (StringCondition) new ConditionFactory(
|
318
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
323
319
|
fail("Argument type mismatch");
|
324
320
|
}
|
325
321
|
catch (ConfigException e) {
|
@@ -339,7 +335,7 @@ public class TestConditionFactory
|
|
339
335
|
return Optional.of("IS NULL");
|
340
336
|
}
|
341
337
|
};
|
342
|
-
condition = (TimestampCondition) new ConditionFactory(
|
338
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
343
339
|
assertTrue(condition.compare(null));
|
344
340
|
|
345
341
|
config = new DefaultConditionConfig() {
|
@@ -353,20 +349,12 @@ public class TestConditionFactory
|
|
353
349
|
}
|
354
350
|
};
|
355
351
|
try {
|
356
|
-
condition = (TimestampCondition) new ConditionFactory(
|
352
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
357
353
|
fail("Argument is required");
|
358
354
|
}
|
359
355
|
catch (ConfigException e) {
|
360
356
|
}
|
361
357
|
|
362
|
-
//ToDo: How to create jruby object correctly?
|
363
|
-
//config = new DefaultConditionConfig() {
|
364
|
-
// public Optional<String> getOperator() { return Optional.of("=="); }
|
365
|
-
// public Optional<Object> getArgument() { return Optional.of((Object)"2015-07-15"); }
|
366
|
-
// public Optional<String> getFormat() { return Optional.of("%Y-%m-%d"); }
|
367
|
-
//};
|
368
|
-
//condition = (TimestampCondition)new ConditionFactory(jruby, column, config).createCondition();
|
369
|
-
|
370
358
|
config = new DefaultConditionConfig() {
|
371
359
|
public Optional<String> getOperator()
|
372
360
|
{
|
@@ -378,7 +366,7 @@ public class TestConditionFactory
|
|
378
366
|
}
|
379
367
|
};
|
380
368
|
try {
|
381
|
-
condition = (TimestampCondition) new ConditionFactory(
|
369
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
382
370
|
fail("Argument type mismatch");
|
383
371
|
}
|
384
372
|
catch (ConfigException e) {
|
@@ -11,7 +11,6 @@ import org.embulk.spi.SchemaConfigException;
|
|
11
11
|
import org.embulk.spi.time.Timestamp;
|
12
12
|
|
13
13
|
import org.embulk.spi.time.TimestampParseException;
|
14
|
-
import org.jruby.embed.ScriptingContainer;
|
15
14
|
import org.junit.BeforeClass;
|
16
15
|
import org.junit.Rule;
|
17
16
|
import org.junit.Test;
|
@@ -32,7 +31,6 @@ import static org.junit.Assert.assertTrue;
|
|
32
31
|
public class TestParser
|
33
32
|
{
|
34
33
|
private static EmbulkTestRuntime runtime = new EmbulkTestRuntime(); // very slow
|
35
|
-
private static ScriptingContainer jruby = new ScriptingContainer();
|
36
34
|
|
37
35
|
private static PageReader buildPageReader(Schema schema, final Object... objects)
|
38
36
|
{
|
@@ -51,8 +49,6 @@ public class TestParser
|
|
51
49
|
@BeforeClass
|
52
50
|
public static void setupBeforeClass()
|
53
51
|
{
|
54
|
-
ParserLiteral.setJRuby(jruby);
|
55
|
-
|
56
52
|
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
57
53
|
Value k1 = ValueFactory.newString("k1");
|
58
54
|
Value k2 = ValueFactory.newString("k2");
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-row
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -96,7 +96,7 @@ files:
|
|
96
96
|
- src/test/java/org/embulk/filter/row/condition/TestTimestampCondition.java
|
97
97
|
- src/test/java/org/embulk/filter/row/where/TestParser.java
|
98
98
|
- src/test/java/org/embulk/filter/row/where/TestYylex.java
|
99
|
-
- classpath/embulk-filter-row-0.
|
99
|
+
- classpath/embulk-filter-row-0.5.0.jar
|
100
100
|
- classpath/jcodings-1.0.18.jar
|
101
101
|
- classpath/joni-2.1.11.jar
|
102
102
|
homepage: https://github.com/sonots/embulk-filter-row
|