embulk-filter-calcite 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/CHANGELOG.md +12 -0
  4. data/README.md +7 -0
  5. data/build.gradle +7 -11
  6. data/config/checkstyle/README.md +6 -0
  7. data/config/checkstyle/checkstyle-suppressions.xml +8 -0
  8. data/config/checkstyle/checkstyle.xml +195 -104
  9. data/config/checkstyle/google_checks.xml +218 -0
  10. data/src/main/java/org/embulk/filter/calcite/CalciteFilterPlugin.java +111 -118
  11. data/src/main/java/org/embulk/filter/calcite/PageConverter.java +19 -36
  12. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageEnumerator.java +14 -15
  13. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageFieldType.java +9 -15
  14. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchema.java +4 -9
  15. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchemaFactory.java +7 -8
  16. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageTable.java +23 -14
  17. data/src/main/java/org/embulk/filter/calcite/getter/FilterColumnGetterFactory.java +23 -11
  18. data/src/main/java/org/embulk/filter/calcite/getter/FilterTimestampColumnGetter.java +10 -14
  19. data/src/test/java/org/embulk/filter/calcite/TestCalciteFilterPlugin.java +81 -55
  20. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_expected.csv +4 -0
  21. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_filter.yml +3 -0
  22. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_in.yml +18 -0
  23. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_source.csv +5 -0
  24. metadata +14 -7
  25. data/config/checkstyle/default.xml +0 -108
@@ -6,49 +6,48 @@ import org.embulk.spi.Page;
6
6
  import org.embulk.spi.PageReader;
7
7
  import org.embulk.spi.Schema;
8
8
 
9
- public class PageEnumerator
10
- implements Enumerator<Object[]>
11
- {
9
+ public class PageEnumerator implements Enumerator<Object[]> {
10
+
12
11
  private final Schema schema;
13
12
  private final PageConverter pageConverter;
14
13
  private final PageReader pageReader;
15
14
 
16
- public PageEnumerator(Schema schema, PageConverter pageConverter)
17
- {
15
+ /**
16
+ * Creates an enumerator to read {@code Page} objects
17
+ *
18
+ * @param schema a {@code Schema} that is used for reading {@code Page} objects.
19
+ * @param pageConverter a converter to translate values from Embulk types to Calcite types.
20
+ */
21
+ public PageEnumerator(Schema schema, PageConverter pageConverter) {
18
22
  this.schema = schema;
19
23
  this.pageReader = new PageReader(schema);
20
24
  this.pageConverter = pageConverter;
21
25
  }
22
26
 
23
- public void setPage(Page page)
24
- {
27
+ public void setPage(Page page) {
25
28
  this.pageReader.setPage(page);
26
29
  this.pageConverter.setPageReader(pageReader);
27
30
  }
28
31
 
29
32
  @Override
30
- public Object[] current()
31
- {
33
+ public Object[] current() {
32
34
  // this is called from org.apache.calcite.linq4j.EnumerableDefaults
33
35
  schema.visitColumns(pageConverter);
34
36
  return pageConverter.getRow();
35
37
  }
36
38
 
37
39
  @Override
38
- public boolean moveNext()
39
- {
40
+ public boolean moveNext() {
40
41
  return pageReader.nextRecord();
41
42
  }
42
43
 
43
44
  @Override
44
- public void reset()
45
- {
45
+ public void reset() {
46
46
  throw new UnsupportedOperationException();
47
47
  }
48
48
 
49
49
  @Override
50
- public void close()
51
- {
50
+ public void close() {
52
51
  if (pageReader != null) {
53
52
  pageReader.close();
54
53
  }
@@ -1,13 +1,11 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
- import org.apache.calcite.adapter.java.JavaTypeFactory;
4
- import org.apache.calcite.rel.type.RelDataType;
5
-
6
3
  import java.util.HashMap;
7
4
  import java.util.Map;
5
+ import org.apache.calcite.adapter.java.JavaTypeFactory;
6
+ import org.apache.calcite.rel.type.RelDataType;
8
7
 
9
- enum PageFieldType
10
- {
8
+ enum PageFieldType {
11
9
  STRING(String.class, "string"),
12
10
  BOOLEAN(Boolean.class, Boolean.TYPE.getSimpleName()),
13
11
  LONG(Long.class, Long.TYPE.getSimpleName()),
@@ -16,8 +14,7 @@ enum PageFieldType
16
14
 
17
15
  private static final Map<String, PageFieldType> MAP = new HashMap<>();
18
16
 
19
- static
20
- {
17
+ static {
21
18
  for (PageFieldType value : values()) {
22
19
  MAP.put(value.simpleName, value);
23
20
  }
@@ -26,19 +23,16 @@ enum PageFieldType
26
23
  private final Class clazz;
27
24
  private final String simpleName;
28
25
 
29
- private PageFieldType(Class clazz, String simpleName)
30
- {
26
+ private PageFieldType(Class clazz, String simpleName) {
31
27
  this.clazz = clazz;
32
28
  this.simpleName = simpleName;
33
29
  }
34
30
 
35
- public RelDataType toType(JavaTypeFactory typeFactory)
36
- {
37
- return typeFactory.createJavaType(clazz);
31
+ public static PageFieldType of(String typeString) {
32
+ return MAP.get(typeString);
38
33
  }
39
34
 
40
- public static PageFieldType of(String typeString)
41
- {
42
- return MAP.get(typeString);
35
+ public RelDataType toType(JavaTypeFactory typeFactory) {
36
+ return typeFactory.createJavaType(clazz);
43
37
  }
44
38
  }
@@ -1,26 +1,21 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
3
  import com.google.common.collect.ImmutableMap;
4
+ import java.util.Map;
4
5
  import org.apache.calcite.schema.Table;
5
6
  import org.apache.calcite.schema.impl.AbstractSchema;
6
- import org.embulk.filter.calcite.PageConverter;
7
7
  import org.embulk.spi.Schema;
8
8
 
9
- import java.util.Map;
9
+ public class PageSchema extends AbstractSchema {
10
10
 
11
- public class PageSchema
12
- extends AbstractSchema
13
- {
14
11
  public static Schema schema;
15
12
 
16
- public PageSchema()
17
- {
13
+ public PageSchema() {
18
14
  super();
19
15
  }
20
16
 
21
17
  @Override
22
- protected Map<String, Table> getTableMap()
23
- {
18
+ protected Map<String, Table> getTableMap() {
24
19
  return ImmutableMap.<String, Table>of("$PAGES", new PageTable(schema, null));
25
20
  }
26
21
  }
@@ -1,26 +1,25 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
+ import java.util.Map;
3
4
  import org.apache.calcite.schema.SchemaFactory;
4
5
  import org.apache.calcite.schema.SchemaPlus;
5
6
 
6
- import java.util.Map;
7
-
8
7
  /**
9
8
  * Factory that creates a {@link PageSchema}.
9
+ *
10
10
  * @see https://github.com/apache/calcite/blob/master/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvSchemaFactory.java
11
11
  */
12
12
  public class PageSchemaFactory
13
- implements SchemaFactory
14
- {
13
+ implements SchemaFactory {
14
+
15
15
  public static final PageSchemaFactory INSTANCE = new PageSchemaFactory();
16
16
 
17
- private PageSchemaFactory()
18
- {
17
+ private PageSchemaFactory() {
19
18
  }
20
19
 
21
20
  @Override
22
- public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name, Map<String, Object> operand)
23
- {
21
+ public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name,
22
+ Map<String, Object> operand) {
24
23
  return new PageSchema();
25
24
  }
26
25
  }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
+ import java.util.ArrayList;
4
+ import java.util.List;
3
5
  import org.apache.calcite.DataContext;
4
6
  import org.apache.calcite.adapter.java.JavaTypeFactory;
5
7
  import org.apache.calcite.linq4j.AbstractEnumerable;
@@ -16,27 +18,29 @@ import org.embulk.spi.Column;
16
18
  import org.embulk.spi.Page;
17
19
  import org.embulk.spi.Schema;
18
20
 
19
- import java.util.ArrayList;
20
- import java.util.List;
21
+ /**
22
+ * Base class for table that reads Pages.
23
+ */
24
+ public class PageTable extends AbstractTable implements ScannableTable {
21
25
 
22
- public class PageTable
23
- extends AbstractTable
24
- implements ScannableTable
25
- {
26
26
  public static ThreadLocal<PageConverter> pageConverter = new ThreadLocal<>();
27
27
  public static ThreadLocal<Page> page = new ThreadLocal<>();
28
28
 
29
29
  private final Schema schema;
30
30
  private final RelProtoDataType protoRowType;
31
31
 
32
- PageTable(Schema schema, RelProtoDataType protoRowType)
33
- {
32
+ // Creates a {@code PageTable} object.
33
+ PageTable(Schema schema, RelProtoDataType protoRowType) {
34
34
  this.schema = schema;
35
35
  this.protoRowType = protoRowType;
36
36
  }
37
37
 
38
- public RelDataType getRowType(RelDataTypeFactory typeFactory)
39
- {
38
+ /**
39
+ * Returns a {@code RelDataType} by a given {@code RelDataTypeFactory}.
40
+ *
41
+ * @param typeFactory a factory object to create {code RelDataType}s.
42
+ */
43
+ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
40
44
  if (protoRowType != null) {
41
45
  return protoRowType.apply(typeFactory);
42
46
  }
@@ -53,11 +57,16 @@ public class PageTable
53
57
  return typeFactory.createStructType(Pair.zip(names, types));
54
58
  }
55
59
 
56
- public Enumerable<Object[]> scan(DataContext root)
57
- {
60
+ /**
61
+ * Creates and returns a {@code Enumerable} object to read a {@code Page} object.
62
+ *
63
+ * @param root a {@code DataContext} object that can be used during scanning a {@code Page}
64
+ * object.
65
+ * @return a {@code Enumerable} object
66
+ */
67
+ public Enumerable<Object[]> scan(DataContext root) {
58
68
  return new AbstractEnumerable<Object[]>() {
59
- public Enumerator<Object[]> enumerator()
60
- {
69
+ public Enumerator<Object[]> enumerator() {
61
70
  PageEnumerator enumerator = new PageEnumerator(schema, pageConverter.get());
62
71
  if (page.get() != null) {
63
72
  enumerator.setPage(page.get());
@@ -11,25 +11,37 @@ import org.embulk.spi.type.Type;
11
11
  import org.joda.time.DateTimeZone;
12
12
 
13
13
  public class FilterColumnGetterFactory
14
- extends ColumnGetterFactory
15
- {
14
+ extends ColumnGetterFactory {
15
+
16
16
  private final DateTimeZone defaultTimeZone;
17
17
 
18
- public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone)
19
- {
18
+ /**
19
+ * Creates a factory object to create {@code ColumnGetter}s for converting JdbcType to Embulk
20
+ * type.
21
+ *
22
+ * @param to a {@code PageBuilder} object that is passed to column getters.
23
+ * @param defaultTimeZone a {@code DateTimeZone} object passed to timestamp column getters as
24
+ * default.
25
+ */
26
+ public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone) {
20
27
  super(to, defaultTimeZone);
21
- this.defaultTimeZone = defaultTimeZone; // TODO make change super.defaultTimeZone field protected
28
+ // TODO make change super.defaultTimeZone field protected
29
+ this.defaultTimeZone = defaultTimeZone;
22
30
  }
23
31
 
24
32
  @Override
25
- public ColumnGetter newColumnGetter(JdbcInputConnection con, AbstractJdbcInputPlugin.PluginTask task, JdbcColumn column, JdbcColumnOption option)
26
- {
33
+ public ColumnGetter newColumnGetter(JdbcInputConnection con,
34
+ AbstractJdbcInputPlugin.PluginTask task,
35
+ JdbcColumn column,
36
+ JdbcColumnOption option) {
27
37
  String valueType = option.getValueType();
28
38
  Type toType = getToType(option);
29
- if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType()).equals("timestamp")) {
30
- return new FilterTimestampColumnGetter(to, toType, option.getTimeZone().or(defaultTimeZone));
31
- }
32
- else {
39
+ if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType())
40
+ .equals("timestamp")) {
41
+ return new FilterTimestampColumnGetter(to,
42
+ toType,
43
+ option.getTimeZone().or(defaultTimeZone));
44
+ } else {
33
45
  return super.newColumnGetter(con, task, column, option);
34
46
  }
35
47
  }
@@ -1,33 +1,29 @@
1
1
  package org.embulk.filter.calcite.getter;
2
2
 
3
+ import java.sql.ResultSet;
4
+ import java.sql.SQLException;
5
+ import java.util.Calendar;
6
+ import java.util.TimeZone;
7
+
3
8
  import org.embulk.input.jdbc.getter.TimestampColumnGetter;
4
9
  import org.embulk.spi.PageBuilder;
5
10
  import org.embulk.spi.time.Timestamp;
6
11
  import org.embulk.spi.type.Type;
7
12
  import org.joda.time.DateTimeZone;
8
13
 
9
- import java.sql.ResultSet;
10
- import java.sql.SQLException;
11
- import java.util.Calendar;
12
-
13
- import static java.util.Calendar.getInstance;
14
- import static java.util.TimeZone.getTimeZone;
15
-
16
14
  public class FilterTimestampColumnGetter
17
- extends TimestampColumnGetter
18
- {
15
+ extends TimestampColumnGetter {
16
+
19
17
  private static final ThreadLocal<Calendar> calendar = new ThreadLocal<>();
20
18
 
21
- public FilterTimestampColumnGetter(PageBuilder to, Type toType, DateTimeZone timeZone)
22
- {
19
+ public FilterTimestampColumnGetter(PageBuilder to, Type toType, DateTimeZone timeZone) {
23
20
  super(to, toType, null);
24
- calendar.set(getInstance(getTimeZone(timeZone.getID()))); // set TLS here
21
+ calendar.set(Calendar.getInstance(TimeZone.getTimeZone(timeZone.getID()))); // set TLS here
25
22
  }
26
23
 
27
24
  @Override
28
25
  protected void fetch(ResultSet from, int fromIndex)
29
- throws SQLException
30
- {
26
+ throws SQLException {
31
27
  java.sql.Timestamp timestamp = from.getTimestamp(fromIndex, calendar.get());
32
28
  if (timestamp != null) {
33
29
  value = Timestamp.ofEpochSecond(timestamp.getTime() / 1000, timestamp.getNanos());
@@ -1,24 +1,20 @@
1
1
  package org.embulk.filter.calcite;
2
2
 
3
3
  import com.google.common.collect.ImmutableList;
4
+ import java.io.IOException;
5
+ import java.nio.file.Path;
4
6
  import org.embulk.config.ConfigSource;
5
7
  import org.embulk.spi.FilterPlugin;
8
+ import org.embulk.test.EmbulkTests;
6
9
  import org.embulk.test.TestingEmbulk;
10
+ import org.hamcrest.Matchers;
11
+ import org.junit.Assert;
7
12
  import org.junit.Before;
8
13
  import org.junit.Rule;
9
14
  import org.junit.Test;
10
15
 
11
- import java.io.IOException;
12
- import java.nio.file.Path;
13
-
14
- import static org.embulk.test.EmbulkTests.copyResource;
15
- import static org.embulk.test.EmbulkTests.readResource;
16
- import static org.embulk.test.EmbulkTests.readSortedFile;
17
- import static org.hamcrest.Matchers.is;
18
- import static org.junit.Assert.assertThat;
16
+ public class TestCalciteFilterPlugin {
19
17
 
20
- public class TestCalciteFilterPlugin
21
- {
22
18
  private static final String RESOURCE_NAME_PREFIX = "org/embulk/filter/calcite/test/";
23
19
 
24
20
  @Rule
@@ -28,69 +24,99 @@ public class TestCalciteFilterPlugin
28
24
 
29
25
  private ConfigSource baseConfig;
30
26
 
27
+ static void assertRecordsByResource(TestingEmbulk embulk,
28
+ String inConfigYamlResourceName,
29
+ String filterConfigYamlResourceName,
30
+ String sourceCsvResourceName,
31
+ String resultCsvResourceName)
32
+ throws IOException {
33
+ Path inputPath = embulk.createTempFile("csv");
34
+ Path outputPath = embulk.createTempFile("csv");
35
+
36
+ // in: config
37
+ EmbulkTests.copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
38
+ ConfigSource inConfig = embulk.loadYamlResource(
39
+ RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
40
+ .set("path_prefix", inputPath.toAbsolutePath().toString());
41
+
42
+ // remove_columns filter config
43
+ ConfigSource filterConfig = embulk
44
+ .loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
45
+
46
+ TestingEmbulk.RunResult result = embulk.inputBuilder()
47
+ .in(inConfig)
48
+ .filters(ImmutableList.of(filterConfig))
49
+ .outputPath(outputPath)
50
+ .run();
51
+
52
+ Assert.assertThat(EmbulkTests.readSortedFile(outputPath),
53
+ Matchers.is(EmbulkTests.readResource(
54
+ RESOURCE_NAME_PREFIX + resultCsvResourceName)));
55
+ }
56
+
31
57
  @Before
32
- public void setup()
33
- {
58
+ public void setup() {
34
59
  baseConfig = embulk.newConfig();
35
60
  }
36
61
 
37
62
  @Test
38
- public void testSimple() throws Exception
39
- {
40
- assertRecordsByResource(embulk, "test_simple_in.yml", "test_simple_filter.yml",
41
- "test_simple_source.csv", "test_simple_expected.csv");
63
+ public void testSimple() throws Exception {
64
+ assertRecordsByResource(embulk,
65
+ "test_simple_in.yml",
66
+ "test_simple_filter.yml",
67
+ "test_simple_source.csv",
68
+ "test_simple_expected.csv");
42
69
  }
43
70
 
71
+ /**
72
+ * This method was added to confirm #13 is fixed or not.
73
+ *
74
+ * @throws Exception
75
+ *
76
+ * @see https://github.com/muga/embulk-filter-calcite/issues/13
77
+ * @see https://issues.apache.org/jira/browse/CALCITE-1673
78
+ */
44
79
  @Test
45
- public void testIntOperators() throws Exception
46
- {
47
- assertRecordsByResource(embulk, "test_int_ops_in.yml", "test_int_ops_filter.yml",
48
- "test_int_ops_source.csv", "test_int_ops_expected.csv");
80
+ public void testTimestampConversion() throws Exception {
81
+ assertRecordsByResource(embulk, "test_timestamp_conv_in.yml",
82
+ "test_timestamp_conv_filter.yml",
83
+ "test_timestamp_conv_source.csv",
84
+ "test_timestamp_conv_expected.csv");
49
85
  }
50
86
 
51
87
  @Test
52
- public void testWhereIntCondition() throws Exception
53
- {
54
- assertRecordsByResource(embulk, "test_where_int_cond_in.yml", "test_where_int_cond_filter.yml",
55
- "test_where_int_cond_source.csv", "test_where_int_cond_expected.csv");
88
+ public void testIntOperators() throws Exception {
89
+ assertRecordsByResource(embulk,
90
+ "test_int_ops_in.yml",
91
+ "test_int_ops_filter.yml",
92
+ "test_int_ops_source.csv",
93
+ "test_int_ops_expected.csv");
56
94
  }
57
95
 
58
96
  @Test
59
- public void testStringOperators() throws Exception
60
- {
61
- assertRecordsByResource(embulk, "test_string_ops_in.yml", "test_string_ops_filter.yml",
62
- "test_string_ops_source.csv", "test_string_ops_expected.csv");
97
+ public void testWhereIntCondition() throws Exception {
98
+ assertRecordsByResource(embulk,
99
+ "test_where_int_cond_in.yml",
100
+ "test_where_int_cond_filter.yml",
101
+ "test_where_int_cond_source.csv",
102
+ "test_where_int_cond_expected.csv");
63
103
  }
64
104
 
65
105
  @Test
66
- public void testWhereStringCondition() throws Exception
67
- {
68
- assertRecordsByResource(embulk, "test_where_string_cond_in.yml", "test_where_string_cond_filter.yml",
69
- "test_where_string_cond_source.csv", "test_where_string_cond_expected.csv");
106
+ public void testStringOperators() throws Exception {
107
+ assertRecordsByResource(embulk,
108
+ "test_string_ops_in.yml",
109
+ "test_string_ops_filter.yml",
110
+ "test_string_ops_source.csv",
111
+ "test_string_ops_expected.csv");
70
112
  }
71
113
 
72
- static void assertRecordsByResource(TestingEmbulk embulk,
73
- String inConfigYamlResourceName, String filterConfigYamlResourceName,
74
- String sourceCsvResourceName, String resultCsvResourceName)
75
- throws IOException
76
- {
77
- Path inputPath = embulk.createTempFile("csv");
78
- Path outputPath = embulk.createTempFile("csv");
79
-
80
- // in: config
81
- copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
82
- ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
83
- .set("path_prefix", inputPath.toAbsolutePath().toString());
84
-
85
- // remove_columns filter config
86
- ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
87
-
88
- TestingEmbulk.RunResult result = embulk.inputBuilder()
89
- .in(inConfig)
90
- .filters(ImmutableList.of(filterConfig))
91
- .outputPath(outputPath)
92
- .run();
93
-
94
- assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
114
+ @Test
115
+ public void testWhereStringCondition() throws Exception {
116
+ assertRecordsByResource(embulk,
117
+ "test_where_string_cond_in.yml",
118
+ "test_where_string_cond_filter.yml",
119
+ "test_where_string_cond_source.csv",
120
+ "test_where_string_cond_expected.csv");
95
121
  }
96
122
  }