embulk-filter-calcite 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.travis.yml +4 -0
- data/CHANGELOG.md +3 -0
- data/README.md +61 -0
- data/build.gradle +112 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/calcite.rb +3 -0
- data/src/main/java/org/embulk/filter/calcite/CalciteFilterPlugin.java +309 -0
- data/src/main/java/org/embulk/filter/calcite/PageConverter.java +119 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageEnumerator.java +56 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageFieldType.java +44 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchema.java +26 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchemaFactory.java +26 -0
- data/src/main/java/org/embulk/filter/calcite/adapter/page/PageTable.java +69 -0
- data/src/main/java/org/embulk/filter/calcite/getter/FilterColumnGetterFactory.java +45 -0
- data/src/main/java/org/embulk/filter/calcite/getter/UTCTimestampColumnGetter.java +41 -0
- data/src/test/java/org/embulk/filter/calcite/TestCalciteFilterPlugin.java +96 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_int_ops_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_simple_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_expected.csv +4 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_string_ops_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_expected.csv +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_int_cond_source.csv +5 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_expected.csv +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_filter.yml +2 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_in.yml +18 -0
- data/src/test/resources/org/embulk/filter/calcite/test/test_where_string_cond_source.csv +5 -0
- metadata +137 -0
@@ -0,0 +1,119 @@
|
|
1
|
+
package org.embulk.filter.calcite;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.ColumnVisitor;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
import org.embulk.spi.Schema;
|
7
|
+
|
8
|
+
import java.math.BigDecimal;
|
9
|
+
import java.util.TimeZone;
|
10
|
+
|
11
|
+
/**
|
12
|
+
* This class converts Embulk's Page values into Calcite's row types. It refers to
|
13
|
+
* org.apache.calcite.adapter.csv.CsvEnumerator.
|
14
|
+
*/
|
15
|
+
public class PageConverter
|
16
|
+
implements ColumnVisitor
|
17
|
+
{
|
18
|
+
private final TimeZone defaultTimeZone;
|
19
|
+
private final Object[] row;
|
20
|
+
private PageReader pageReader;
|
21
|
+
|
22
|
+
public PageConverter(Schema schema, TimeZone defaultTimeZone)
|
23
|
+
{
|
24
|
+
this.defaultTimeZone = defaultTimeZone;
|
25
|
+
this.row = new Object[schema.getColumnCount()];
|
26
|
+
}
|
27
|
+
|
28
|
+
public Object[] getRow()
|
29
|
+
{
|
30
|
+
return row;
|
31
|
+
}
|
32
|
+
|
33
|
+
public void setPageReader(PageReader pageReader)
|
34
|
+
{
|
35
|
+
this.pageReader = pageReader;
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public void booleanColumn(Column column)
|
40
|
+
{
|
41
|
+
// Embulk's boolean is converted into Java's boolean
|
42
|
+
int i = column.getIndex();
|
43
|
+
if (pageReader.isNull(i)) {
|
44
|
+
row[i] = null;
|
45
|
+
}
|
46
|
+
else {
|
47
|
+
row[i] = pageReader.getBoolean(i);
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public void longColumn(Column column)
|
53
|
+
{
|
54
|
+
// Embulk's long is converted into long type
|
55
|
+
int i = column.getIndex();
|
56
|
+
if (pageReader.isNull(i)) {
|
57
|
+
row[i] = null;
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
row[i] = pageReader.getLong(i);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
@Override
|
65
|
+
public void doubleColumn(Column column)
|
66
|
+
{
|
67
|
+
// Embulk's double is converted into java.math.BigDecimal
|
68
|
+
int i = column.getIndex();
|
69
|
+
if (pageReader.isNull(i)) {
|
70
|
+
row[i] = null;
|
71
|
+
}
|
72
|
+
else {
|
73
|
+
row[i] = new BigDecimal(pageReader.getDouble(i));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
@Override
|
78
|
+
public void stringColumn(Column column)
|
79
|
+
{
|
80
|
+
// Embulk's string is converted into java.lang.String
|
81
|
+
int i = column.getIndex();
|
82
|
+
if (pageReader.isNull(i)) {
|
83
|
+
row[i] = null;
|
84
|
+
}
|
85
|
+
else {
|
86
|
+
row[i] = pageReader.getString(i);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
@Override
|
91
|
+
public void timestampColumn(Column column)
|
92
|
+
{
|
93
|
+
int i = column.getIndex();
|
94
|
+
if (pageReader.isNull(i)) {
|
95
|
+
row[i] = null;
|
96
|
+
}
|
97
|
+
else {
|
98
|
+
// Embulk's timestamp is converted into java.sql.Timestmap
|
99
|
+
org.embulk.spi.time.Timestamp timestamp = pageReader.getTimestamp(i);
|
100
|
+
long milliseconds = timestamp.getEpochSecond() * 1000 + timestamp.getNano() / 1000000;
|
101
|
+
java.sql.Timestamp ts = new java.sql.Timestamp(milliseconds);
|
102
|
+
ts.setNanos(timestamp.getNano());
|
103
|
+
row[i] = ts;
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
@Override
|
108
|
+
public void jsonColumn(Column column)
|
109
|
+
{
|
110
|
+
// Embulk's json is converted into Java's string
|
111
|
+
int i = column.getIndex();
|
112
|
+
if (pageReader.isNull(i)) {
|
113
|
+
row[i] = null;
|
114
|
+
}
|
115
|
+
else {
|
116
|
+
row[i] = pageReader.getJson(i).toJson();
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.linq4j.Enumerator;
|
4
|
+
import org.embulk.filter.calcite.PageConverter;
|
5
|
+
import org.embulk.spi.Page;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class PageEnumerator
|
10
|
+
implements Enumerator<Object[]>
|
11
|
+
{
|
12
|
+
private final Schema schema;
|
13
|
+
private final PageConverter pageConverter;
|
14
|
+
private final PageReader pageReader;
|
15
|
+
|
16
|
+
public PageEnumerator(Schema schema, PageConverter pageConverter)
|
17
|
+
{
|
18
|
+
this.schema = schema;
|
19
|
+
this.pageReader = new PageReader(schema);
|
20
|
+
this.pageConverter = pageConverter;
|
21
|
+
}
|
22
|
+
|
23
|
+
public void setPage(Page page)
|
24
|
+
{
|
25
|
+
this.pageReader.setPage(page);
|
26
|
+
this.pageConverter.setPageReader(pageReader);
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public Object[] current()
|
31
|
+
{
|
32
|
+
// this is called from org.apache.calcite.linq4j.EnumerableDefaults
|
33
|
+
schema.visitColumns(pageConverter);
|
34
|
+
return pageConverter.getRow();
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public boolean moveNext()
|
39
|
+
{
|
40
|
+
return pageReader.nextRecord();
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void reset()
|
45
|
+
{
|
46
|
+
throw new UnsupportedOperationException();
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void close()
|
51
|
+
{
|
52
|
+
if (pageReader != null) {
|
53
|
+
pageReader.close();
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.adapter.java.JavaTypeFactory;
|
4
|
+
import org.apache.calcite.rel.type.RelDataType;
|
5
|
+
|
6
|
+
import java.util.HashMap;
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
enum PageFieldType
|
10
|
+
{
|
11
|
+
STRING(String.class, "string"),
|
12
|
+
BOOLEAN(Boolean.class, Boolean.TYPE.getSimpleName()),
|
13
|
+
LONG(Long.class, Long.TYPE.getSimpleName()),
|
14
|
+
DOUBLE(Double.class, Double.TYPE.getSimpleName()),
|
15
|
+
TIMESTAMP(java.sql.Timestamp.class, "timestamp");
|
16
|
+
|
17
|
+
private static final Map<String, PageFieldType> MAP = new HashMap<>();
|
18
|
+
|
19
|
+
static
|
20
|
+
{
|
21
|
+
for (PageFieldType value : values()) {
|
22
|
+
MAP.put(value.simpleName, value);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
private final Class clazz;
|
27
|
+
private final String simpleName;
|
28
|
+
|
29
|
+
private PageFieldType(Class clazz, String simpleName)
|
30
|
+
{
|
31
|
+
this.clazz = clazz;
|
32
|
+
this.simpleName = simpleName;
|
33
|
+
}
|
34
|
+
|
35
|
+
public RelDataType toType(JavaTypeFactory typeFactory)
|
36
|
+
{
|
37
|
+
return typeFactory.createJavaType(clazz);
|
38
|
+
}
|
39
|
+
|
40
|
+
public static PageFieldType of(String typeString)
|
41
|
+
{
|
42
|
+
return MAP.get(typeString);
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableMap;
|
4
|
+
import org.apache.calcite.schema.Table;
|
5
|
+
import org.apache.calcite.schema.impl.AbstractSchema;
|
6
|
+
import org.embulk.filter.calcite.PageConverter;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
import java.util.Map;
|
10
|
+
|
11
|
+
public class PageSchema
|
12
|
+
extends AbstractSchema
|
13
|
+
{
|
14
|
+
public static Schema schema;
|
15
|
+
|
16
|
+
public PageSchema()
|
17
|
+
{
|
18
|
+
super();
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
protected Map<String, Table> getTableMap()
|
23
|
+
{
|
24
|
+
return ImmutableMap.<String, Table>of("$PAGES", new PageTable(schema, null));
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.schema.SchemaFactory;
|
4
|
+
import org.apache.calcite.schema.SchemaPlus;
|
5
|
+
|
6
|
+
import java.util.Map;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Factory that creates a {@link PageSchema}.
|
10
|
+
* @see https://github.com/apache/calcite/blob/master/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvSchemaFactory.java
|
11
|
+
*/
|
12
|
+
public class PageSchemaFactory
|
13
|
+
implements SchemaFactory
|
14
|
+
{
|
15
|
+
public static final PageSchemaFactory INSTANCE = new PageSchemaFactory();
|
16
|
+
|
17
|
+
private PageSchemaFactory()
|
18
|
+
{
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name, Map<String, Object> operand)
|
23
|
+
{
|
24
|
+
return new PageSchema();
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
package org.embulk.filter.calcite.adapter.page;
|
2
|
+
|
3
|
+
import org.apache.calcite.DataContext;
|
4
|
+
import org.apache.calcite.adapter.java.JavaTypeFactory;
|
5
|
+
import org.apache.calcite.linq4j.AbstractEnumerable;
|
6
|
+
import org.apache.calcite.linq4j.Enumerable;
|
7
|
+
import org.apache.calcite.linq4j.Enumerator;
|
8
|
+
import org.apache.calcite.rel.type.RelDataType;
|
9
|
+
import org.apache.calcite.rel.type.RelDataTypeFactory;
|
10
|
+
import org.apache.calcite.rel.type.RelProtoDataType;
|
11
|
+
import org.apache.calcite.schema.ScannableTable;
|
12
|
+
import org.apache.calcite.schema.impl.AbstractTable;
|
13
|
+
import org.apache.calcite.util.Pair;
|
14
|
+
import org.embulk.filter.calcite.PageConverter;
|
15
|
+
import org.embulk.spi.Column;
|
16
|
+
import org.embulk.spi.Page;
|
17
|
+
import org.embulk.spi.Schema;
|
18
|
+
|
19
|
+
import java.util.ArrayList;
|
20
|
+
import java.util.List;
|
21
|
+
|
22
|
+
public class PageTable
|
23
|
+
extends AbstractTable
|
24
|
+
implements ScannableTable
|
25
|
+
{
|
26
|
+
public static ThreadLocal<PageConverter> pageConverter = new ThreadLocal<>();
|
27
|
+
public static ThreadLocal<Page> page = new ThreadLocal<>();
|
28
|
+
|
29
|
+
private final Schema schema;
|
30
|
+
private final RelProtoDataType protoRowType;
|
31
|
+
|
32
|
+
PageTable(Schema schema, RelProtoDataType protoRowType)
|
33
|
+
{
|
34
|
+
this.schema = schema;
|
35
|
+
this.protoRowType = protoRowType;
|
36
|
+
}
|
37
|
+
|
38
|
+
public RelDataType getRowType(RelDataTypeFactory typeFactory)
|
39
|
+
{
|
40
|
+
if (protoRowType != null) {
|
41
|
+
return protoRowType.apply(typeFactory);
|
42
|
+
}
|
43
|
+
|
44
|
+
final List<RelDataType> types = new ArrayList<>(schema.getColumnCount());
|
45
|
+
final List<String> names = new ArrayList<>(schema.getColumnCount());
|
46
|
+
|
47
|
+
for (Column column : schema.getColumns()) {
|
48
|
+
names.add(column.getName());
|
49
|
+
PageFieldType type = PageFieldType.of(column.getType().getName());
|
50
|
+
types.add(type.toType((JavaTypeFactory) typeFactory));
|
51
|
+
}
|
52
|
+
|
53
|
+
return typeFactory.createStructType(Pair.zip(names, types));
|
54
|
+
}
|
55
|
+
|
56
|
+
public Enumerable<Object[]> scan(DataContext root)
|
57
|
+
{
|
58
|
+
return new AbstractEnumerable<Object[]>() {
|
59
|
+
public Enumerator<Object[]> enumerator()
|
60
|
+
{
|
61
|
+
PageEnumerator enumerator = new PageEnumerator(schema, pageConverter.get());
|
62
|
+
if (page.get() != null) {
|
63
|
+
enumerator.setPage(page.get());
|
64
|
+
}
|
65
|
+
return enumerator;
|
66
|
+
}
|
67
|
+
};
|
68
|
+
}
|
69
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package org.embulk.filter.calcite.getter;
|
2
|
+
|
3
|
+
import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
|
4
|
+
import org.embulk.input.jdbc.JdbcColumn;
|
5
|
+
import org.embulk.input.jdbc.JdbcColumnOption;
|
6
|
+
import org.embulk.input.jdbc.JdbcInputConnection;
|
7
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
8
|
+
import org.embulk.input.jdbc.getter.ColumnGetterFactory;
|
9
|
+
import org.embulk.spi.PageBuilder;
|
10
|
+
import org.embulk.spi.time.TimestampFormatter;
|
11
|
+
import org.embulk.spi.type.Type;
|
12
|
+
import org.joda.time.DateTimeZone;
|
13
|
+
|
14
|
+
public class FilterColumnGetterFactory
|
15
|
+
extends ColumnGetterFactory
|
16
|
+
{
|
17
|
+
private final DateTimeZone defaultTimeZone;
|
18
|
+
|
19
|
+
public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone)
|
20
|
+
{
|
21
|
+
super(to, defaultTimeZone);
|
22
|
+
this.defaultTimeZone = defaultTimeZone; // TODO make change super.defaultTimeZone field protected
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public ColumnGetter newColumnGetter(JdbcInputConnection con, AbstractJdbcInputPlugin.PluginTask task, JdbcColumn column, JdbcColumnOption option)
|
27
|
+
{
|
28
|
+
String valueType = option.getValueType();
|
29
|
+
Type toType = getToType(option);
|
30
|
+
if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType()).equals("timestamp")) {
|
31
|
+
return new UTCTimestampColumnGetter(to, toType, newTimestampFormatter(option, "%Y-%m-%d"));
|
32
|
+
}
|
33
|
+
else {
|
34
|
+
return super.newColumnGetter(con, task, column, option);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
private TimestampFormatter newTimestampFormatter(JdbcColumnOption option, String defaultTimestampFormat)
|
39
|
+
{
|
40
|
+
return new TimestampFormatter(
|
41
|
+
option.getJRuby(),
|
42
|
+
option.getTimestampFormat().isPresent() ? option.getTimestampFormat().get().getFormat() : defaultTimestampFormat,
|
43
|
+
option.getTimeZone().or(defaultTimeZone));
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.filter.calcite.getter;
|
2
|
+
|
3
|
+
import org.embulk.input.jdbc.getter.TimestampColumnGetter;
|
4
|
+
import org.embulk.spi.PageBuilder;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampFormatter;
|
7
|
+
import org.embulk.spi.type.Type;
|
8
|
+
|
9
|
+
import java.sql.ResultSet;
|
10
|
+
import java.sql.SQLException;
|
11
|
+
import java.util.Calendar;
|
12
|
+
|
13
|
+
import static java.util.Calendar.getInstance;
|
14
|
+
import static java.util.TimeZone.getTimeZone;
|
15
|
+
|
16
|
+
public class UTCTimestampColumnGetter
|
17
|
+
extends TimestampColumnGetter
|
18
|
+
{
|
19
|
+
private static ThreadLocal<Calendar> calendar = new ThreadLocal<Calendar>() {
|
20
|
+
@Override
|
21
|
+
protected Calendar initialValue()
|
22
|
+
{
|
23
|
+
return getInstance(getTimeZone("UTC"));
|
24
|
+
}
|
25
|
+
};
|
26
|
+
|
27
|
+
public UTCTimestampColumnGetter(PageBuilder to, Type toType, TimestampFormatter timestampFormatter)
|
28
|
+
{
|
29
|
+
super(to, toType, timestampFormatter);
|
30
|
+
}
|
31
|
+
|
32
|
+
@Override
|
33
|
+
protected void fetch(ResultSet from, int fromIndex)
|
34
|
+
throws SQLException
|
35
|
+
{
|
36
|
+
java.sql.Timestamp timestamp = from.getTimestamp(fromIndex, calendar.get());
|
37
|
+
if (timestamp != null) {
|
38
|
+
value = Timestamp.ofEpochSecond(timestamp.getTime() / 1000, timestamp.getNanos());
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
package org.embulk.filter.calcite;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.spi.FilterPlugin;
|
6
|
+
import org.embulk.test.TestingEmbulk;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.nio.file.Path;
|
13
|
+
|
14
|
+
import static org.embulk.test.EmbulkTests.copyResource;
|
15
|
+
import static org.embulk.test.EmbulkTests.readResource;
|
16
|
+
import static org.embulk.test.EmbulkTests.readSortedFile;
|
17
|
+
import static org.hamcrest.Matchers.is;
|
18
|
+
import static org.junit.Assert.assertThat;
|
19
|
+
|
20
|
+
public class TestCalciteFilterPlugin
|
21
|
+
{
|
22
|
+
private static final String RESOURCE_NAME_PREFIX = "org/embulk/filter/calcite/test/";
|
23
|
+
|
24
|
+
@Rule
|
25
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
26
|
+
.registerPlugin(FilterPlugin.class, "calcite", CalciteFilterPlugin.class)
|
27
|
+
.build();
|
28
|
+
|
29
|
+
private ConfigSource baseConfig;
|
30
|
+
|
31
|
+
@Before
|
32
|
+
public void setup()
|
33
|
+
{
|
34
|
+
baseConfig = embulk.newConfig();
|
35
|
+
}
|
36
|
+
|
37
|
+
@Test
|
38
|
+
public void testSimple() throws Exception
|
39
|
+
{
|
40
|
+
assertRecordsByResource(embulk, "test_simple_in.yml", "test_simple_filter.yml",
|
41
|
+
"test_simple_source.csv", "test_simple_expected.csv");
|
42
|
+
}
|
43
|
+
|
44
|
+
@Test
|
45
|
+
public void testIntOperators() throws Exception
|
46
|
+
{
|
47
|
+
assertRecordsByResource(embulk, "test_int_ops_in.yml", "test_int_ops_filter.yml",
|
48
|
+
"test_int_ops_source.csv", "test_int_ops_expected.csv");
|
49
|
+
}
|
50
|
+
|
51
|
+
@Test
|
52
|
+
public void testWhereIntCondition() throws Exception
|
53
|
+
{
|
54
|
+
assertRecordsByResource(embulk, "test_where_int_cond_in.yml", "test_where_int_cond_filter.yml",
|
55
|
+
"test_where_int_cond_source.csv", "test_where_int_cond_expected.csv");
|
56
|
+
}
|
57
|
+
|
58
|
+
@Test
|
59
|
+
public void testStringOperators() throws Exception
|
60
|
+
{
|
61
|
+
assertRecordsByResource(embulk, "test_string_ops_in.yml", "test_string_ops_filter.yml",
|
62
|
+
"test_string_ops_source.csv", "test_string_ops_expected.csv");
|
63
|
+
}
|
64
|
+
|
65
|
+
@Test
|
66
|
+
public void testWhereStringCondition() throws Exception
|
67
|
+
{
|
68
|
+
assertRecordsByResource(embulk, "test_where_string_cond_in.yml", "test_where_string_cond_filter.yml",
|
69
|
+
"test_where_string_cond_source.csv", "test_where_string_cond_expected.csv");
|
70
|
+
}
|
71
|
+
|
72
|
+
static void assertRecordsByResource(TestingEmbulk embulk,
|
73
|
+
String inConfigYamlResourceName, String filterConfigYamlResourceName,
|
74
|
+
String sourceCsvResourceName, String resultCsvResourceName)
|
75
|
+
throws IOException
|
76
|
+
{
|
77
|
+
Path inputPath = embulk.createTempFile("csv");
|
78
|
+
Path outputPath = embulk.createTempFile("csv");
|
79
|
+
|
80
|
+
// in: config
|
81
|
+
copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
|
82
|
+
ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
|
83
|
+
.set("path_prefix", inputPath.toAbsolutePath().toString());
|
84
|
+
|
85
|
+
// remove_columns filter config
|
86
|
+
ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
|
87
|
+
|
88
|
+
TestingEmbulk.RunResult result = embulk.inputBuilder()
|
89
|
+
.in(inConfig)
|
90
|
+
.filters(ImmutableList.of(filterConfig))
|
91
|
+
.outputPath(outputPath)
|
92
|
+
.run();
|
93
|
+
|
94
|
+
assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
|
95
|
+
}
|
96
|
+
}
|