embulk-filter-row 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +16 -13
- data/build.gradle +3 -3
- data/src/main/java/org/embulk/filter/row/AbstractGuardColumnVisitor.java +1 -1
- data/src/main/java/org/embulk/filter/row/RowFilterPlugin.java +0 -1
- data/src/main/java/org/embulk/filter/row/condition/ConditionFactory.java +2 -5
- data/src/main/java/org/embulk/filter/row/where/ParserLiteral.java +1 -8
- data/src/test/java/org/embulk/filter/row/condition/TestConditionFactory.java +19 -31
- data/src/test/java/org/embulk/filter/row/where/TestParser.java +0 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54615ee2184dfcb3791aa6d92922f1f64a9905a5
|
4
|
+
data.tar.gz: 5558f2a44121b9fdbfcee09d4f1d048ef934d20c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64168aceba7a817f404a6520c3e6ba5e499e9797bb6dcf97ca71eec26684a40f93f94a687b3e8284799c4ab6e09343cef416f68159adca30546c919da8f29390
|
7
|
+
data.tar.gz: be28eba1b0afa8edbeae6c2857ee645ce9319b2cf10eac6ce57720321789121676ce3706dc280051bd887b51a05b61d58be6913e7638670fb1b210e92a31a852
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@ A filter plugin for Embulk to filter out rows
|
|
6
6
|
|
7
7
|
## Configuration
|
8
8
|
|
9
|
-
|
9
|
+
Requirement: version >= 0.3.0
|
10
10
|
|
11
11
|
* **where**: Select only rows which matches with conditions written in SQL-like syntax. See [SQL-like Syntax](#sql-like-syntax)
|
12
12
|
|
@@ -23,13 +23,13 @@ filters:
|
|
23
23
|
- type: row
|
24
24
|
where: |-
|
25
25
|
(
|
26
|
-
|
27
|
-
|
26
|
+
string_column START_WITH 'str' AND
|
27
|
+
number_column > 1.0
|
28
28
|
)
|
29
29
|
OR
|
30
30
|
(
|
31
|
-
|
32
|
-
"
|
31
|
+
time_column = TIMESTAMP '2016-01-01 +0900' AND
|
32
|
+
"true_column" = true
|
33
33
|
)
|
34
34
|
```
|
35
35
|
|
@@ -37,20 +37,22 @@ See [SQL-like Syntax](#sql-like-syntax) for more details
|
|
37
37
|
|
38
38
|
# SQL-like Syntax
|
39
39
|
|
40
|
-
|
40
|
+
```sql
|
41
|
+
where: column1 = 'str'
|
42
|
+
```
|
41
43
|
|
42
44
|
```sql
|
43
45
|
where: |-
|
44
46
|
(
|
45
|
-
|
46
|
-
|
47
|
+
string_column START_WITH 'str' AND
|
48
|
+
number_column > 1.0
|
47
49
|
|
48
50
|
)
|
49
51
|
OR
|
50
52
|
(
|
51
|
-
|
52
|
-
"
|
53
|
-
|
53
|
+
time_column = TIMESTAMP '2016-01-01 +0900' AND
|
54
|
+
"true_column" = true AND
|
55
|
+
string_column REGEXP '^reg'
|
54
56
|
)
|
55
57
|
```
|
56
58
|
|
@@ -227,8 +229,9 @@ filters:
|
|
227
229
|
## Comparisions
|
228
230
|
|
229
231
|
* [embulk-filter-calcite](https://github.com/muga/embulk-filter-calcite)
|
230
|
-
* embulk-filter-calcite is a pretty nice plugin which enables us to write SQL query to filter embulk records
|
231
|
-
*
|
232
|
+
* embulk-filter-calcite is a pretty nice plugin which enables us to write SQL query to filter embulk records, not only `WHERE` but also `SELECT`.
|
233
|
+
* Based on [my benchmark (Japanese)](http://qiita.com/sonots/items/a70482d29862de87624d), embulk-filter-row was faster than embulk-filter-calcite.
|
234
|
+
* Choose which to use as your demand.
|
232
235
|
|
233
236
|
## ToDo
|
234
237
|
|
data/build.gradle
CHANGED
@@ -15,13 +15,13 @@ configurations {
|
|
15
15
|
provided
|
16
16
|
}
|
17
17
|
|
18
|
-
version = "0.
|
18
|
+
version = "0.5.0"
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8
|
24
|
-
provided "org.embulk:embulk-core:0.8
|
23
|
+
compile "org.embulk:embulk-core:0.8.29+"
|
24
|
+
provided "org.embulk:embulk-core:0.8.29+"
|
25
25
|
|
26
26
|
compile "org.jruby.joni:joni:2.1.11"
|
27
27
|
compile "org.jruby.jcodings:jcodings:1.0.18"
|
@@ -40,7 +40,7 @@ abstract class AbstractGuardColumnVisitor
|
|
40
40
|
String columnName = conditionConfig.getColumn();
|
41
41
|
for (Column column : outputSchema.getColumns()) {
|
42
42
|
if (columnName.equals(column.getName())) {
|
43
|
-
ConditionFactory factory = new ConditionFactory(
|
43
|
+
ConditionFactory factory = new ConditionFactory(column, conditionConfig);
|
44
44
|
Condition condition = factory.createCondition();
|
45
45
|
conditionMap.get(columnName).add(condition);
|
46
46
|
break;
|
@@ -55,7 +55,6 @@ public class RowFilterPlugin implements FilterPlugin
|
|
55
55
|
FilterPlugin.Control control)
|
56
56
|
{
|
57
57
|
PluginTask task = config.loadConfig(PluginTask.class);
|
58
|
-
ParserLiteral.setJRuby(task.getJRuby());
|
59
58
|
|
60
59
|
configure(task, inputSchema);
|
61
60
|
Schema outputSchema = inputSchema;
|
@@ -15,11 +15,9 @@ import org.embulk.spi.type.StringType;
|
|
15
15
|
import org.embulk.spi.type.TimestampType;
|
16
16
|
import org.embulk.spi.type.Type;
|
17
17
|
import org.joda.time.DateTimeZone;
|
18
|
-
import org.jruby.embed.ScriptingContainer;
|
19
18
|
|
20
19
|
public class ConditionFactory
|
21
20
|
{
|
22
|
-
private final ScriptingContainer jruby;
|
23
21
|
private Column column;
|
24
22
|
private String columnName;
|
25
23
|
private Type columnType;
|
@@ -27,9 +25,8 @@ public class ConditionFactory
|
|
27
25
|
private String operator;
|
28
26
|
private boolean not;
|
29
27
|
|
30
|
-
public ConditionFactory(
|
28
|
+
public ConditionFactory(Column column, ConditionConfig conditionConfig)
|
31
29
|
{
|
32
|
-
this.jruby = jruby;
|
33
30
|
this.column = column;
|
34
31
|
this.columnName = column.getName();
|
35
32
|
this.columnType = column.getType();
|
@@ -140,7 +137,7 @@ public class ConditionFactory
|
|
140
137
|
String format = (String) conditionConfig.getFormat().get();
|
141
138
|
DateTimeZone timezone = DateTimeZone.forID((String) conditionConfig.getTimezone().get());
|
142
139
|
|
143
|
-
TimestampParser parser = new TimestampParser(
|
140
|
+
TimestampParser parser = new TimestampParser(format, timezone);
|
144
141
|
try {
|
145
142
|
Timestamp timestamp = parser.parse(argument);
|
146
143
|
return new TimestampCondition(operator, timestamp, not);
|
@@ -16,20 +16,13 @@ import org.embulk.spi.type.StringType;
|
|
16
16
|
import org.embulk.spi.type.TimestampType;
|
17
17
|
import org.embulk.spi.type.Type;
|
18
18
|
import org.joda.time.DateTimeZone;
|
19
|
-
import org.jruby.embed.ScriptingContainer;
|
20
19
|
import org.msgpack.value.Value;
|
21
20
|
|
22
21
|
// Literal Node of AST (Abstract Syntax Tree)
|
23
22
|
public abstract class ParserLiteral extends ParserNode
|
24
23
|
{
|
25
|
-
protected static ScriptingContainer jruby;
|
26
24
|
protected String yytext;
|
27
25
|
|
28
|
-
public static void setJRuby(ScriptingContainer jruby)
|
29
|
-
{
|
30
|
-
ParserLiteral.jruby = jruby;
|
31
|
-
}
|
32
|
-
|
33
26
|
public boolean isBoolean()
|
34
27
|
{
|
35
28
|
return false;
|
@@ -185,7 +178,7 @@ class TimestampLiteral extends ParserLiteral
|
|
185
178
|
TimestampParseException ex = null;
|
186
179
|
for (String format : formats) {
|
187
180
|
try {
|
188
|
-
TimestampParser timestampParser = new TimestampParser(
|
181
|
+
TimestampParser timestampParser = new TimestampParser(format, default_timezone);
|
189
182
|
this.val = timestampParser.parse(literal.val);
|
190
183
|
break;
|
191
184
|
}
|
@@ -6,7 +6,6 @@ import org.embulk.config.ConfigException;
|
|
6
6
|
import org.embulk.config.TaskSource;
|
7
7
|
import org.embulk.spi.Column;
|
8
8
|
|
9
|
-
import org.jruby.embed.ScriptingContainer;
|
10
9
|
import org.junit.Test;
|
11
10
|
|
12
11
|
import static org.embulk.spi.type.Types.BOOLEAN;
|
@@ -63,11 +62,8 @@ public class TestConditionFactory
|
|
63
62
|
}
|
64
63
|
}
|
65
64
|
|
66
|
-
private final ScriptingContainer jruby;
|
67
|
-
|
68
65
|
public TestConditionFactory()
|
69
66
|
{
|
70
|
-
jruby = new ScriptingContainer();
|
71
67
|
}
|
72
68
|
|
73
69
|
@Test
|
@@ -83,7 +79,7 @@ public class TestConditionFactory
|
|
83
79
|
return Optional.of("IS NULL");
|
84
80
|
}
|
85
81
|
};
|
86
|
-
condition = (BooleanCondition) new ConditionFactory(
|
82
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
87
83
|
assertTrue(condition.compare(null));
|
88
84
|
|
89
85
|
config = new DefaultConditionConfig() {
|
@@ -97,7 +93,7 @@ public class TestConditionFactory
|
|
97
93
|
}
|
98
94
|
};
|
99
95
|
try {
|
100
|
-
condition = (BooleanCondition) new ConditionFactory(
|
96
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
101
97
|
fail("Argument is required");
|
102
98
|
}
|
103
99
|
catch (ConfigException e) {
|
@@ -113,7 +109,7 @@ public class TestConditionFactory
|
|
113
109
|
return Optional.of((Object) new Boolean(true));
|
114
110
|
}
|
115
111
|
};
|
116
|
-
condition = (BooleanCondition) new ConditionFactory(
|
112
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
117
113
|
assertTrue(condition.compare(new Boolean(true)));
|
118
114
|
|
119
115
|
config = new DefaultConditionConfig() {
|
@@ -127,7 +123,7 @@ public class TestConditionFactory
|
|
127
123
|
}
|
128
124
|
};
|
129
125
|
try {
|
130
|
-
condition = (BooleanCondition) new ConditionFactory(
|
126
|
+
condition = (BooleanCondition) new ConditionFactory(column, config).createCondition();
|
131
127
|
fail("Argument type mismatch");
|
132
128
|
}
|
133
129
|
catch (ConfigException e) {
|
@@ -147,7 +143,7 @@ public class TestConditionFactory
|
|
147
143
|
return Optional.of("IS NULL");
|
148
144
|
}
|
149
145
|
};
|
150
|
-
condition = (DoubleCondition) new ConditionFactory(
|
146
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
151
147
|
assertTrue(condition.compare(null));
|
152
148
|
|
153
149
|
config = new DefaultConditionConfig() {
|
@@ -161,7 +157,7 @@ public class TestConditionFactory
|
|
161
157
|
}
|
162
158
|
};
|
163
159
|
try {
|
164
|
-
condition = (DoubleCondition) new ConditionFactory(
|
160
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
165
161
|
fail("Argument is required");
|
166
162
|
}
|
167
163
|
catch (ConfigException e) {
|
@@ -177,7 +173,7 @@ public class TestConditionFactory
|
|
177
173
|
return Optional.of((Object) new Double(10));
|
178
174
|
}
|
179
175
|
};
|
180
|
-
condition = (DoubleCondition) new ConditionFactory(
|
176
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
181
177
|
assertTrue(condition.compare(new Double(10)));
|
182
178
|
|
183
179
|
config = new DefaultConditionConfig() {
|
@@ -191,7 +187,7 @@ public class TestConditionFactory
|
|
191
187
|
}
|
192
188
|
};
|
193
189
|
try {
|
194
|
-
condition = (DoubleCondition) new ConditionFactory(
|
190
|
+
condition = (DoubleCondition) new ConditionFactory(column, config).createCondition();
|
195
191
|
fail("Argument type mismatch");
|
196
192
|
}
|
197
193
|
catch (ConfigException e) {
|
@@ -211,7 +207,7 @@ public class TestConditionFactory
|
|
211
207
|
return Optional.of("IS NULL");
|
212
208
|
}
|
213
209
|
};
|
214
|
-
condition = (LongCondition) new ConditionFactory(
|
210
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
215
211
|
assertTrue(condition.compare(null));
|
216
212
|
|
217
213
|
config = new DefaultConditionConfig() {
|
@@ -225,7 +221,7 @@ public class TestConditionFactory
|
|
225
221
|
}
|
226
222
|
};
|
227
223
|
try {
|
228
|
-
condition = (LongCondition) new ConditionFactory(
|
224
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
229
225
|
fail("Argument is required");
|
230
226
|
}
|
231
227
|
catch (ConfigException e) {
|
@@ -241,7 +237,7 @@ public class TestConditionFactory
|
|
241
237
|
return Optional.of((Object) new Long(10));
|
242
238
|
}
|
243
239
|
};
|
244
|
-
condition = (LongCondition) new ConditionFactory(
|
240
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
245
241
|
assertTrue(condition.compare(new Long(10)));
|
246
242
|
|
247
243
|
config = new DefaultConditionConfig() {
|
@@ -255,7 +251,7 @@ public class TestConditionFactory
|
|
255
251
|
}
|
256
252
|
};
|
257
253
|
try {
|
258
|
-
condition = (LongCondition) new ConditionFactory(
|
254
|
+
condition = (LongCondition) new ConditionFactory(column, config).createCondition();
|
259
255
|
fail("Argument type mismatch");
|
260
256
|
}
|
261
257
|
catch (ConfigException e) {
|
@@ -275,7 +271,7 @@ public class TestConditionFactory
|
|
275
271
|
return Optional.of("IS NULL");
|
276
272
|
}
|
277
273
|
};
|
278
|
-
condition = (StringCondition) new ConditionFactory(
|
274
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
279
275
|
assertTrue(condition.compare(null));
|
280
276
|
|
281
277
|
config = new DefaultConditionConfig() {
|
@@ -289,7 +285,7 @@ public class TestConditionFactory
|
|
289
285
|
}
|
290
286
|
};
|
291
287
|
try {
|
292
|
-
condition = (StringCondition) new ConditionFactory(
|
288
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
293
289
|
fail("Argument is required");
|
294
290
|
}
|
295
291
|
catch (ConfigException e) {
|
@@ -305,7 +301,7 @@ public class TestConditionFactory
|
|
305
301
|
return Optional.of((Object) "foo");
|
306
302
|
}
|
307
303
|
};
|
308
|
-
condition = (StringCondition) new ConditionFactory(
|
304
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
309
305
|
assertTrue(condition.compare("foo"));
|
310
306
|
|
311
307
|
config = new DefaultConditionConfig() {
|
@@ -319,7 +315,7 @@ public class TestConditionFactory
|
|
319
315
|
}
|
320
316
|
};
|
321
317
|
try {
|
322
|
-
condition = (StringCondition) new ConditionFactory(
|
318
|
+
condition = (StringCondition) new ConditionFactory(column, config).createCondition();
|
323
319
|
fail("Argument type mismatch");
|
324
320
|
}
|
325
321
|
catch (ConfigException e) {
|
@@ -339,7 +335,7 @@ public class TestConditionFactory
|
|
339
335
|
return Optional.of("IS NULL");
|
340
336
|
}
|
341
337
|
};
|
342
|
-
condition = (TimestampCondition) new ConditionFactory(
|
338
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
343
339
|
assertTrue(condition.compare(null));
|
344
340
|
|
345
341
|
config = new DefaultConditionConfig() {
|
@@ -353,20 +349,12 @@ public class TestConditionFactory
|
|
353
349
|
}
|
354
350
|
};
|
355
351
|
try {
|
356
|
-
condition = (TimestampCondition) new ConditionFactory(
|
352
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
357
353
|
fail("Argument is required");
|
358
354
|
}
|
359
355
|
catch (ConfigException e) {
|
360
356
|
}
|
361
357
|
|
362
|
-
//ToDo: How to create jruby object correctly?
|
363
|
-
//config = new DefaultConditionConfig() {
|
364
|
-
// public Optional<String> getOperator() { return Optional.of("=="); }
|
365
|
-
// public Optional<Object> getArgument() { return Optional.of((Object)"2015-07-15"); }
|
366
|
-
// public Optional<String> getFormat() { return Optional.of("%Y-%m-%d"); }
|
367
|
-
//};
|
368
|
-
//condition = (TimestampCondition)new ConditionFactory(jruby, column, config).createCondition();
|
369
|
-
|
370
358
|
config = new DefaultConditionConfig() {
|
371
359
|
public Optional<String> getOperator()
|
372
360
|
{
|
@@ -378,7 +366,7 @@ public class TestConditionFactory
|
|
378
366
|
}
|
379
367
|
};
|
380
368
|
try {
|
381
|
-
condition = (TimestampCondition) new ConditionFactory(
|
369
|
+
condition = (TimestampCondition) new ConditionFactory(column, config).createCondition();
|
382
370
|
fail("Argument type mismatch");
|
383
371
|
}
|
384
372
|
catch (ConfigException e) {
|
@@ -11,7 +11,6 @@ import org.embulk.spi.SchemaConfigException;
|
|
11
11
|
import org.embulk.spi.time.Timestamp;
|
12
12
|
|
13
13
|
import org.embulk.spi.time.TimestampParseException;
|
14
|
-
import org.jruby.embed.ScriptingContainer;
|
15
14
|
import org.junit.BeforeClass;
|
16
15
|
import org.junit.Rule;
|
17
16
|
import org.junit.Test;
|
@@ -32,7 +31,6 @@ import static org.junit.Assert.assertTrue;
|
|
32
31
|
public class TestParser
|
33
32
|
{
|
34
33
|
private static EmbulkTestRuntime runtime = new EmbulkTestRuntime(); // very slow
|
35
|
-
private static ScriptingContainer jruby = new ScriptingContainer();
|
36
34
|
|
37
35
|
private static PageReader buildPageReader(Schema schema, final Object... objects)
|
38
36
|
{
|
@@ -51,8 +49,6 @@ public class TestParser
|
|
51
49
|
@BeforeClass
|
52
50
|
public static void setupBeforeClass()
|
53
51
|
{
|
54
|
-
ParserLiteral.setJRuby(jruby);
|
55
|
-
|
56
52
|
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
57
53
|
Value k1 = ValueFactory.newString("k1");
|
58
54
|
Value k2 = ValueFactory.newString("k2");
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-row
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -96,7 +96,7 @@ files:
|
|
96
96
|
- src/test/java/org/embulk/filter/row/condition/TestTimestampCondition.java
|
97
97
|
- src/test/java/org/embulk/filter/row/where/TestParser.java
|
98
98
|
- src/test/java/org/embulk/filter/row/where/TestYylex.java
|
99
|
-
- classpath/embulk-filter-row-0.
|
99
|
+
- classpath/embulk-filter-row-0.5.0.jar
|
100
100
|
- classpath/jcodings-1.0.18.jar
|
101
101
|
- classpath/joni-2.1.11.jar
|
102
102
|
homepage: https://github.com/sonots/embulk-filter-row
|