embulk-filter-calcite 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.travis.yml +4 -0
- data/CHANGELOG.md +3 -0
- data/README.md +61 -0
- data/build.gradle +112 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/calcite.rb +3 -0
- data/src/main/java/org/embulk/filter/calcite/CalciteFilterPlugin.java +309 -0
- data/src/main/java/org/embulk/filter/calcite/PageConverter.java +119 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageEnumerator.java +56 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageFieldType.java +44 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchema.java +26 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchemaFactory.java +26 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageTable.java +69 -0
- data/src/main/java/org/embulk/filter/calcite/getter/FilterColumnGetterFactory.java +45 -0
- data/src/main/java/org/embulk/filter/calcite/getter/UTCTimestampColumnGetter.java +41 -0
- data/src/test/java/org/embulk/filter/calcite/TestCalciteFilterPlugin.java +96 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_expected.csv +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_expected.csv +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_source.csv +5 -0
- metadata +137 -0
@@ -0,0 +1,119 @@
|
|
1
|
+
package org.embulk.filter.calcite;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.ColumnVisitor;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
import org.embulk.spi.Schema;
|
7
|
+
|
8
|
+
import java.math.BigDecimal;
|
9
|
+
import java.util.TimeZone;
|
10
|
+
|
11
|
+
/**
|
12
|
+
* This class converts Embulk's Page values into Calcite's row types. It refers to
|
13
|
+
* org.apache.calcite.adapter.csv.CsvEnumerator.
|
14
|
+
*/
|
15
|
+
public class PageConverter
|
16
|
+
implements ColumnVisitor
|
17
|
+
{
|
18
|
+
private final TimeZone defaultTimeZone;
|
19
|
+
private final Object[] row;
|
20
|
+
private PageReader pageReader;
|
21
|
+
|
22
|
+
public PageConverter(Schema schema, TimeZone defaultTimeZone)
|
23
|
+
{
|
24
|
+
this.defaultTimeZone = defaultTimeZone;
|
25
|
+
this.row = new Object[schema.getColumnCount()];
|
26
|
+
}
|
27
|
+
|
28
|
+
public Object[] getRow()
|
29
|
+
{
|
30
|
+
return row;
|
31
|
+
}
|
32
|
+
|
33
|
+
public void setPageReader(PageReader pageReader)
|
34
|
+
{
|
35
|
+
this.pageReader = pageReader;
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public void booleanColumn(Column column)
|
40
|
+
{
|
41
|
+
// Embulk's boolean is converted into Java's boolean
|
42
|
+
int i = column.getIndex();
|
43
|
+
if (pageReader.isNull(i)) {
|
44
|
+
row[i] = null;
|
45
|
+
}
|
46
|
+
else {
|
47
|
+
row[i] = pageReader.getBoolean(i);
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public void longColumn(Column column)
|
53
|
+
{
|
54
|
+
// Embulk's long is converted into long type
|
55
|
+
int i = column.getIndex();
|
56
|
+
if (pageReader.isNull(i)) {
|
57
|
+
row[i] = null;
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
row[i] = pageReader.getLong(i);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
@Override
|
65
|
+
public void doubleColumn(Column column)
|
66
|
+
{
|
67
|
+
// Embulk's double is converted into java.math.BigDecimal
|
68
|
+
int i = column.getIndex();
|
69
|
+
if (pageReader.isNull(i)) {
|
70
|
+
row[i] = null;
|
71
|
+
}
|
72
|
+
else {
|
73
|
+
row[i] = new BigDecimal(pageReader.getDouble(i));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
@Override
|
78
|
+
public void stringColumn(Column column)
|
79
|
+
{
|
80
|
+
// Embulk's string is converted into java.lang.String
|
81
|
+
int i = column.getIndex();
|
82
|
+
if (pageReader.isNull(i)) {
|
83
|
+
row[i] = null;
|
84
|
+
}
|
85
|
+
else {
|
86
|
+
row[i] = pageReader.getString(i);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
@Override
|
91
|
+
public void timestampColumn(Column column)
|
92
|
+
{
|
93
|
+
int i = column.getIndex();
|
94
|
+
if (pageReader.isNull(i)) {
|
95
|
+
row[i] = null;
|
96
|
+
}
|
97
|
+
else {
|
98
|
+
// Embulk's timestamp is converted into java.sql.Timestmap
|
99
|
+
org.embulk.spi.time.Timestamp timestamp = pageReader.getTimestamp(i);
|
100
|
+
long milliseconds = timestamp.getEpochSecond() * 1000 + timestamp.getNano() / 1000000;
|
101
|
+
java.sql.Timestamp ts = new java.sql.Timestamp(milliseconds);
|
102
|
+
ts.setNanos(timestamp.getNano());
|
103
|
+
row[i] = ts;
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
@Override
|
108
|
+
public void jsonColumn(Column column)
|
109
|
+
{
|
110
|
+
// Embulk's json is converted into Java's string
|
111
|
+
int i = column.getIndex();
|
112
|
+
if (pageReader.isNull(i)) {
|
113
|
+
row[i] = null;
|
114
|
+
}
|
115
|
+
else {
|
116
|
+
row[i] = pageReader.getJson(i).toJson();
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.linq4j.Enumerator;
|
4
|
+
import org.embulk.filter.calcite.PageConverter;
|
5
|
+
import org.embulk.spi.Page;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class PageEnumerator
|
10
|
+
implements Enumerator<Object[]>
|
11
|
+
{
|
12
|
+
private final Schema schema;
|
13
|
+
private final PageConverter pageConverter;
|
14
|
+
private final PageReader pageReader;
|
15
|
+
|
16
|
+
public PageEnumerator(Schema schema, PageConverter pageConverter)
|
17
|
+
{
|
18
|
+
this.schema = schema;
|
19
|
+
this.pageReader = new PageReader(schema);
|
20
|
+
this.pageConverter = pageConverter;
|
21
|
+
}
|
22
|
+
|
23
|
+
public void setPage(Page page)
|
24
|
+
{
|
25
|
+
this.pageReader.setPage(page);
|
26
|
+
this.pageConverter.setPageReader(pageReader);
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public Object[] current()
|
31
|
+
{
|
32
|
+
// this is called from org.apache.calcite.linq4j.EnumerableDefaults
|
33
|
+
schema.visitColumns(pageConverter);
|
34
|
+
return pageConverter.getRow();
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public boolean moveNext()
|
39
|
+
{
|
40
|
+
return pageReader.nextRecord();
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void reset()
|
45
|
+
{
|
46
|
+
throw new UnsupportedOperationException();
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void close()
|
51
|
+
{
|
52
|
+
if (pageReader != null) {
|
53
|
+
pageReader.close();
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.adapter.java.JavaTypeFactory;
|
4
|
+
import org.apache.calcite.rel.type.RelDataType;
|
5
|
+
|
6
|
+
import java.util.HashMap;
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
enum PageFieldType
|
10
|
+
{
|
11
|
+
STRING(String.class, "string"),
|
12
|
+
BOOLEAN(Boolean.class, Boolean.TYPE.getSimpleName()),
|
13
|
+
LONG(Long.class, Long.TYPE.getSimpleName()),
|
14
|
+
DOUBLE(Double.class, Double.TYPE.getSimpleName()),
|
15
|
+
TIMESTAMP(java.sql.Timestamp.class, "timestamp");
|
16
|
+
|
17
|
+
private static final Map<String, PageFieldType> MAP = new HashMap<>();
|
18
|
+
|
19
|
+
static
|
20
|
+
{
|
21
|
+
for (PageFieldType value : values()) {
|
22
|
+
MAP.put(value.simpleName, value);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
private final Class clazz;
|
27
|
+
private final String simpleName;
|
28
|
+
|
29
|
+
private PageFieldType(Class clazz, String simpleName)
|
30
|
+
{
|
31
|
+
this.clazz = clazz;
|
32
|
+
this.simpleName = simpleName;
|
33
|
+
}
|
34
|
+
|
35
|
+
public RelDataType toType(JavaTypeFactory typeFactory)
|
36
|
+
{
|
37
|
+
return typeFactory.createJavaType(clazz);
|
38
|
+
}
|
39
|
+
|
40
|
+
public static PageFieldType of(String typeString)
|
41
|
+
{
|
42
|
+
return MAP.get(typeString);
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableMap;
|
4
|
+
import org.apache.calcite.schema.Table;
|
5
|
+
import org.apache.calcite.schema.impl.AbstractSchema;
|
6
|
+
import org.embulk.filter.calcite.PageConverter;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
import java.util.Map;
|
10
|
+
|
11
|
+
public class PageSchema
|
12
|
+
extends AbstractSchema
|
13
|
+
{
|
14
|
+
public static Schema schema;
|
15
|
+
|
16
|
+
public PageSchema()
|
17
|
+
{
|
18
|
+
super();
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
protected Map<String, Table> getTableMap()
|
23
|
+
{
|
24
|
+
return ImmutableMap.<String, Table>of("$PAGES", new PageTable(schema, null));
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.schema.SchemaFactory;
|
4
|
+
import org.apache.calcite.schema.SchemaPlus;
|
5
|
+
|
6
|
+
import java.util.Map;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Factory that creates a {@link PageSchema}.
|
10
|
+
* @see https://github.com/apache/calcite/blob/master/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvSchemaFactory.java
|
11
|
+
*/
|
12
|
+
public class PageSchemaFactory
|
13
|
+
implements SchemaFactory
|
14
|
+
{
|
15
|
+
public static final PageSchemaFactory INSTANCE = new PageSchemaFactory();
|
16
|
+
|
17
|
+
private PageSchemaFactory()
|
18
|
+
{
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name, Map<String, Object> operand)
|
23
|
+
{
|
24
|
+
return new PageSchema();
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.DataContext;
|
4
|
+
import org.apache.calcite.adapter.java.JavaTypeFactory;
|
5
|
+
import org.apache.calcite.linq4j.AbstractEnumerable;
|
6
|
+
import org.apache.calcite.linq4j.Enumerable;
|
7
|
+
import org.apache.calcite.linq4j.Enumerator;
|
8
|
+
import org.apache.calcite.rel.type.RelDataType;
|
9
|
+
import org.apache.calcite.rel.type.RelDataTypeFactory;
|
10
|
+
import org.apache.calcite.rel.type.RelProtoDataType;
|
11
|
+
import org.apache.calcite.schema.ScannableTable;
|
12
|
+
import org.apache.calcite.schema.impl.AbstractTable;
|
13
|
+
import org.apache.calcite.util.Pair;
|
14
|
+
import org.embulk.filter.calcite.PageConverter;
|
15
|
+
import org.embulk.spi.Column;
|
16
|
+
import org.embulk.spi.Page;
|
17
|
+
import org.embulk.spi.Schema;
|
18
|
+
|
19
|
+
import java.util.ArrayList;
|
20
|
+
import java.util.List;
|
21
|
+
|
22
|
+
public class PageTable
|
23
|
+
extends AbstractTable
|
24
|
+
implements ScannableTable
|
25
|
+
{
|
26
|
+
public static ThreadLocal<PageConverter> pageConverter = new ThreadLocal<>();
|
27
|
+
public static ThreadLocal<Page> page = new ThreadLocal<>();
|
28
|
+
|
29
|
+
private final Schema schema;
|
30
|
+
private final RelProtoDataType protoRowType;
|
31
|
+
|
32
|
+
PageTable(Schema schema, RelProtoDataType protoRowType)
|
33
|
+
{
|
34
|
+
this.schema = schema;
|
35
|
+
this.protoRowType = protoRowType;
|
36
|
+
}
|
37
|
+
|
38
|
+
public RelDataType getRowType(RelDataTypeFactory typeFactory)
|
39
|
+
{
|
40
|
+
if (protoRowType != null) {
|
41
|
+
return protoRowType.apply(typeFactory);
|
42
|
+
}
|
43
|
+
|
44
|
+
final List<RelDataType> types = new ArrayList<>(schema.getColumnCount());
|
45
|
+
final List<String> names = new ArrayList<>(schema.getColumnCount());
|
46
|
+
|
47
|
+
for (Column column : schema.getColumns()) {
|
48
|
+
names.add(column.getName());
|
49
|
+
PageFieldType type = PageFieldType.of(column.getType().getName());
|
50
|
+
types.add(type.toType((JavaTypeFactory) typeFactory));
|
51
|
+
}
|
52
|
+
|
53
|
+
return typeFactory.createStructType(Pair.zip(names, types));
|
54
|
+
}
|
55
|
+
|
56
|
+
public Enumerable<Object[]> scan(DataContext root)
|
57
|
+
{
|
58
|
+
return new AbstractEnumerable<Object[]>() {
|
59
|
+
public Enumerator<Object[]> enumerator()
|
60
|
+
{
|
61
|
+
PageEnumerator enumerator = new PageEnumerator(schema, pageConverter.get());
|
62
|
+
if (page.get() != null) {
|
63
|
+
enumerator.setPage(page.get());
|
64
|
+
}
|
65
|
+
return enumerator;
|
66
|
+
}
|
67
|
+
};
|
68
|
+
}
|
69
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package org.embulk.filter.calcite.getter;
|
2
|
+
|
3
|
+
import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
|
4
|
+
import org.embulk.input.jdbc.JdbcColumn;
|
5
|
+
import org.embulk.input.jdbc.JdbcColumnOption;
|
6
|
+
import org.embulk.input.jdbc.JdbcInputConnection;
|
7
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
8
|
+
import org.embulk.input.jdbc.getter.ColumnGetterFactory;
|
9
|
+
import org.embulk.spi.PageBuilder;
|
10
|
+
import org.embulk.spi.time.TimestampFormatter;
|
11
|
+
import org.embulk.spi.type.Type;
|
12
|
+
import org.joda.time.DateTimeZone;
|
13
|
+
|
14
|
+
public class FilterColumnGetterFactory
|
15
|
+
extends ColumnGetterFactory
|
16
|
+
{
|
17
|
+
private final DateTimeZone defaultTimeZone;
|
18
|
+
|
19
|
+
public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone)
|
20
|
+
{
|
21
|
+
super(to, defaultTimeZone);
|
22
|
+
this.defaultTimeZone = defaultTimeZone; // TODO make change super.defaultTimeZone field protected
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public ColumnGetter newColumnGetter(JdbcInputConnection con, AbstractJdbcInputPlugin.PluginTask task, JdbcColumn column, JdbcColumnOption option)
|
27
|
+
{
|
28
|
+
String valueType = option.getValueType();
|
29
|
+
Type toType = getToType(option);
|
30
|
+
if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType()).equals("timestamp")) {
|
31
|
+
return new UTCTimestampColumnGetter(to, toType, newTimestampFormatter(option, "%Y-%m-%d"));
|
32
|
+
}
|
33
|
+
else {
|
34
|
+
return super.newColumnGetter(con, task, column, option);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
private TimestampFormatter newTimestampFormatter(JdbcColumnOption option, String defaultTimestampFormat)
|
39
|
+
{
|
40
|
+
return new TimestampFormatter(
|
41
|
+
option.getJRuby(),
|
42
|
+
option.getTimestampFormat().isPresent() ? option.getTimestampFormat().get().getFormat() : defaultTimestampFormat,
|
43
|
+
option.getTimeZone().or(defaultTimeZone));
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.filter.calcite.getter;
|
2
|
+
|
3
|
+
import org.embulk.input.jdbc.getter.TimestampColumnGetter;
|
4
|
+
import org.embulk.spi.PageBuilder;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampFormatter;
|
7
|
+
import org.embulk.spi.type.Type;
|
8
|
+
|
9
|
+
import java.sql.ResultSet;
|
10
|
+
import java.sql.SQLException;
|
11
|
+
import java.util.Calendar;
|
12
|
+
|
13
|
+
import static java.util.Calendar.getInstance;
|
14
|
+
import static java.util.TimeZone.getTimeZone;
|
15
|
+
|
16
|
+
public class UTCTimestampColumnGetter
|
17
|
+
extends TimestampColumnGetter
|
18
|
+
{
|
19
|
+
private static ThreadLocal<Calendar> calendar = new ThreadLocal<Calendar>() {
|
20
|
+
@Override
|
21
|
+
protected Calendar initialValue()
|
22
|
+
{
|
23
|
+
return getInstance(getTimeZone("UTC"));
|
24
|
+
}
|
25
|
+
};
|
26
|
+
|
27
|
+
public UTCTimestampColumnGetter(PageBuilder to, Type toType, TimestampFormatter timestampFormatter)
|
28
|
+
{
|
29
|
+
super(to, toType, timestampFormatter);
|
30
|
+
}
|
31
|
+
|
32
|
+
@Override
|
33
|
+
protected void fetch(ResultSet from, int fromIndex)
|
34
|
+
throws SQLException
|
35
|
+
{
|
36
|
+
java.sql.Timestamp timestamp = from.getTimestamp(fromIndex, calendar.get());
|
37
|
+
if (timestamp != null) {
|
38
|
+
value = Timestamp.ofEpochSecond(timestamp.getTime() / 1000, timestamp.getNanos());
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
package org.embulk.filter.calcite;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.spi.FilterPlugin;
|
6
|
+
import org.embulk.test.TestingEmbulk;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.nio.file.Path;
|
13
|
+
|
14
|
+
import static org.embulk.test.EmbulkTests.copyResource;
|
15
|
+
import static org.embulk.test.EmbulkTests.readResource;
|
16
|
+
import static org.embulk.test.EmbulkTests.readSortedFile;
|
17
|
+
import static org.hamcrest.Matchers.is;
|
18
|
+
import static org.junit.Assert.assertThat;
|
19
|
+
|
20
|
+
public class TestCalciteFilterPlugin
|
21
|
+
{
|
22
|
+
private static final String RESOURCE_NAME_PREFIX = "org/embulk/filter/calcite/test/";
|
23
|
+
|
24
|
+
@Rule
|
25
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
26
|
+
.registerPlugin(FilterPlugin.class, "calcite", CalciteFilterPlugin.class)
|
27
|
+
.build();
|
28
|
+
|
29
|
+
private ConfigSource baseConfig;
|
30
|
+
|
31
|
+
@Before
|
32
|
+
public void setup()
|
33
|
+
{
|
34
|
+
baseConfig = embulk.newConfig();
|
35
|
+
}
|
36
|
+
|
37
|
+
@Test
|
38
|
+
public void testSimple() throws Exception
|
39
|
+
{
|
40
|
+
assertRecordsByResource(embulk, "test_simple_in.yml", "test_simple_filter.yml",
|
41
|
+
"test_simple_source.csv", "test_simple_expected.csv");
|
42
|
+
}
|
43
|
+
|
44
|
+
@Test
|
45
|
+
public void testIntOperators() throws Exception
|
46
|
+
{
|
47
|
+
assertRecordsByResource(embulk, "test_int_ops_in.yml", "test_int_ops_filter.yml",
|
48
|
+
"test_int_ops_source.csv", "test_int_ops_expected.csv");
|
49
|
+
}
|
50
|
+
|
51
|
+
@Test
|
52
|
+
public void testWhereIntCondition() throws Exception
|
53
|
+
{
|
54
|
+
assertRecordsByResource(embulk, "test_where_int_cond_in.yml", "test_where_int_cond_filter.yml",
|
55
|
+
"test_where_int_cond_source.csv", "test_where_int_cond_expected.csv");
|
56
|
+
}
|
57
|
+
|
58
|
+
@Test
|
59
|
+
public void testStringOperators() throws Exception
|
60
|
+
{
|
61
|
+
assertRecordsByResource(embulk, "test_string_ops_in.yml", "test_string_ops_filter.yml",
|
62
|
+
"test_string_ops_source.csv", "test_string_ops_expected.csv");
|
63
|
+
}
|
64
|
+
|
65
|
+
@Test
|
66
|
+
public void testWhereStringCondition() throws Exception
|
67
|
+
{
|
68
|
+
assertRecordsByResource(embulk, "test_where_string_cond_in.yml", "test_where_string_cond_filter.yml",
|
69
|
+
"test_where_string_cond_source.csv", "test_where_string_cond_expected.csv");
|
70
|
+
}
|
71
|
+
|
72
|
+
static void assertRecordsByResource(TestingEmbulk embulk,
|
73
|
+
String inConfigYamlResourceName, String filterConfigYamlResourceName,
|
74
|
+
String sourceCsvResourceName, String resultCsvResourceName)
|
75
|
+
throws IOException
|
76
|
+
{
|
77
|
+
Path inputPath = embulk.createTempFile("csv");
|
78
|
+
Path outputPath = embulk.createTempFile("csv");
|
79
|
+
|
80
|
+
// in: config
|
81
|
+
copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
|
82
|
+
ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
|
83
|
+
.set("path_prefix", inputPath.toAbsolutePath().toString());
|
84
|
+
|
85
|
+
// remove_columns filter config
|
86
|
+
ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
|
87
|
+
|
88
|
+
TestingEmbulk.RunResult result = embulk.inputBuilder()
|
89
|
+
.in(inConfig)
|
90
|
+
.filters(ImmutableList.of(filterConfig))
|
91
|
+
.outputPath(outputPath)
|
92
|
+
.run();
|
93
|
+
|
94
|
+
assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
|
95
|
+
}
|
96
|
+
}
|