embulk-filter-calcite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/CHANGELOG.md +12 -0
  4. data/README.md +7 -0
  5. data/build.gradle +7 -11
  6. data/config/checkstyle/README.md +6 -0
  7. data/config/checkstyle/checkstyle-suppressions.xml +8 -0
  8. data/config/checkstyle/checkstyle.xml +195 -104
  9. data/config/checkstyle/google_checks.xml +218 -0
  10. data/src/main/java/org/embulk/filter/calcite/CalciteFilterPlugin.java +111 -118
  11. data/src/main/java/org/embulk/filter/calcite/PageConverter.java +19 -36
  12. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageEnumerator.java +14 -15
  13. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageFieldType.java +9 -15
  14. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchema.java +4 -9
  15. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageSchemaFactory.java +7 -8
  16. data/src/main/java/org/embulk/filter/calcite/adapter/page/PageTable.java +23 -14
  17. data/src/main/java/org/embulk/filter/calcite/getter/FilterColumnGetterFactory.java +23 -11
  18. data/src/main/java/org/embulk/filter/calcite/getter/FilterTimestampColumnGetter.java +10 -14
  19. data/src/test/java/org/embulk/filter/calcite/TestCalciteFilterPlugin.java +81 -55
  20. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_expected.csv +4 -0
  21. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_filter.yml +3 -0
  22. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_in.yml +18 -0
  23. data/src/test/resources/org/embulk/filter/calcite/test/test_timestamp_conv_source.csv +5 -0
  24. metadata +14 -7
  25. data/config/checkstyle/default.xml +0 -108
@@ -6,49 +6,48 @@ import org.embulk.spi.Page;
6
6
  import org.embulk.spi.PageReader;
7
7
  import org.embulk.spi.Schema;
8
8
 
9
- public class PageEnumerator
10
- implements Enumerator<Object[]>
11
- {
9
+ public class PageEnumerator implements Enumerator<Object[]> {
10
+
12
11
  private final Schema schema;
13
12
  private final PageConverter pageConverter;
14
13
  private final PageReader pageReader;
15
14
 
16
- public PageEnumerator(Schema schema, PageConverter pageConverter)
17
- {
15
+ /**
16
+ * Creates an enumerator to read {@code Page} objects
17
+ *
18
+ * @param schema a {@code Schema} that is used for reading {@code Page} objects.
19
+ * @param pageConverter a converter to translate values from Embulk types to Calcite types.
20
+ */
21
+ public PageEnumerator(Schema schema, PageConverter pageConverter) {
18
22
  this.schema = schema;
19
23
  this.pageReader = new PageReader(schema);
20
24
  this.pageConverter = pageConverter;
21
25
  }
22
26
 
23
- public void setPage(Page page)
24
- {
27
+ public void setPage(Page page) {
25
28
  this.pageReader.setPage(page);
26
29
  this.pageConverter.setPageReader(pageReader);
27
30
  }
28
31
 
29
32
  @Override
30
- public Object[] current()
31
- {
33
+ public Object[] current() {
32
34
  // this is called from org.apache.calcite.linq4j.EnumerableDefaults
33
35
  schema.visitColumns(pageConverter);
34
36
  return pageConverter.getRow();
35
37
  }
36
38
 
37
39
  @Override
38
- public boolean moveNext()
39
- {
40
+ public boolean moveNext() {
40
41
  return pageReader.nextRecord();
41
42
  }
42
43
 
43
44
  @Override
44
- public void reset()
45
- {
45
+ public void reset() {
46
46
  throw new UnsupportedOperationException();
47
47
  }
48
48
 
49
49
  @Override
50
- public void close()
51
- {
50
+ public void close() {
52
51
  if (pageReader != null) {
53
52
  pageReader.close();
54
53
  }
@@ -1,13 +1,11 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
- import org.apache.calcite.adapter.java.JavaTypeFactory;
4
- import org.apache.calcite.rel.type.RelDataType;
5
-
6
3
  import java.util.HashMap;
7
4
  import java.util.Map;
5
+ import org.apache.calcite.adapter.java.JavaTypeFactory;
6
+ import org.apache.calcite.rel.type.RelDataType;
8
7
 
9
- enum PageFieldType
10
- {
8
+ enum PageFieldType {
11
9
  STRING(String.class, "string"),
12
10
  BOOLEAN(Boolean.class, Boolean.TYPE.getSimpleName()),
13
11
  LONG(Long.class, Long.TYPE.getSimpleName()),
@@ -16,8 +14,7 @@ enum PageFieldType
16
14
 
17
15
  private static final Map<String, PageFieldType> MAP = new HashMap<>();
18
16
 
19
- static
20
- {
17
+ static {
21
18
  for (PageFieldType value : values()) {
22
19
  MAP.put(value.simpleName, value);
23
20
  }
@@ -26,19 +23,16 @@ enum PageFieldType
26
23
  private final Class clazz;
27
24
  private final String simpleName;
28
25
 
29
- private PageFieldType(Class clazz, String simpleName)
30
- {
26
+ private PageFieldType(Class clazz, String simpleName) {
31
27
  this.clazz = clazz;
32
28
  this.simpleName = simpleName;
33
29
  }
34
30
 
35
- public RelDataType toType(JavaTypeFactory typeFactory)
36
- {
37
- return typeFactory.createJavaType(clazz);
31
+ public static PageFieldType of(String typeString) {
32
+ return MAP.get(typeString);
38
33
  }
39
34
 
40
- public static PageFieldType of(String typeString)
41
- {
42
- return MAP.get(typeString);
35
+ public RelDataType toType(JavaTypeFactory typeFactory) {
36
+ return typeFactory.createJavaType(clazz);
43
37
  }
44
38
  }
@@ -1,26 +1,21 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
3
  import com.google.common.collect.ImmutableMap;
4
+ import java.util.Map;
4
5
  import org.apache.calcite.schema.Table;
5
6
  import org.apache.calcite.schema.impl.AbstractSchema;
6
- import org.embulk.filter.calcite.PageConverter;
7
7
  import org.embulk.spi.Schema;
8
8
 
9
- import java.util.Map;
9
+ public class PageSchema extends AbstractSchema {
10
10
 
11
- public class PageSchema
12
- extends AbstractSchema
13
- {
14
11
  public static Schema schema;
15
12
 
16
- public PageSchema()
17
- {
13
+ public PageSchema() {
18
14
  super();
19
15
  }
20
16
 
21
17
  @Override
22
- protected Map<String, Table> getTableMap()
23
- {
18
+ protected Map<String, Table> getTableMap() {
24
19
  return ImmutableMap.<String, Table>of("$PAGES", new PageTable(schema, null));
25
20
  }
26
21
  }
@@ -1,26 +1,25 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
+ import java.util.Map;
3
4
  import org.apache.calcite.schema.SchemaFactory;
4
5
  import org.apache.calcite.schema.SchemaPlus;
5
6
 
6
- import java.util.Map;
7
-
8
7
  /**
9
8
  * Factory that creates a {@link PageSchema}.
9
+ *
10
10
  * @see https://github.com/apache/calcite/blob/master/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvSchemaFactory.java
11
11
  */
12
12
  public class PageSchemaFactory
13
- implements SchemaFactory
14
- {
13
+ implements SchemaFactory {
14
+
15
15
  public static final PageSchemaFactory INSTANCE = new PageSchemaFactory();
16
16
 
17
- private PageSchemaFactory()
18
- {
17
+ private PageSchemaFactory() {
19
18
  }
20
19
 
21
20
  @Override
22
- public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name, Map<String, Object> operand)
23
- {
21
+ public org.apache.calcite.schema.Schema create(SchemaPlus parentSchema, String name,
22
+ Map<String, Object> operand) {
24
23
  return new PageSchema();
25
24
  }
26
25
  }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.calcite.adapter.page;
2
2
 
3
+ import java.util.ArrayList;
4
+ import java.util.List;
3
5
  import org.apache.calcite.DataContext;
4
6
  import org.apache.calcite.adapter.java.JavaTypeFactory;
5
7
  import org.apache.calcite.linq4j.AbstractEnumerable;
@@ -16,27 +18,29 @@ import org.embulk.spi.Column;
16
18
  import org.embulk.spi.Page;
17
19
  import org.embulk.spi.Schema;
18
20
 
19
- import java.util.ArrayList;
20
- import java.util.List;
21
+ /**
22
+ * Base class for table that reads Pages.
23
+ */
24
+ public class PageTable extends AbstractTable implements ScannableTable {
21
25
 
22
- public class PageTable
23
- extends AbstractTable
24
- implements ScannableTable
25
- {
26
26
  public static ThreadLocal<PageConverter> pageConverter = new ThreadLocal<>();
27
27
  public static ThreadLocal<Page> page = new ThreadLocal<>();
28
28
 
29
29
  private final Schema schema;
30
30
  private final RelProtoDataType protoRowType;
31
31
 
32
- PageTable(Schema schema, RelProtoDataType protoRowType)
33
- {
32
+ // Creates a {@code PageTable} object.
33
+ PageTable(Schema schema, RelProtoDataType protoRowType) {
34
34
  this.schema = schema;
35
35
  this.protoRowType = protoRowType;
36
36
  }
37
37
 
38
- public RelDataType getRowType(RelDataTypeFactory typeFactory)
39
- {
38
+ /**
39
+ * Returns a {@code RelDataType} by a given {@code RelDataTypeFactory}.
40
+ *
41
+ * @param typeFactory a factory object to create {code RelDataType}s.
42
+ */
43
+ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
40
44
  if (protoRowType != null) {
41
45
  return protoRowType.apply(typeFactory);
42
46
  }
@@ -53,11 +57,16 @@ public class PageTable
53
57
  return typeFactory.createStructType(Pair.zip(names, types));
54
58
  }
55
59
 
56
- public Enumerable<Object[]> scan(DataContext root)
57
- {
60
+ /**
61
+ * Creates and returns a {@code Enumerable} object to read a {@code Page} object.
62
+ *
63
+ * @param root a {@code DataContext} object that can be used during scanning a {@code Page}
64
+ * object.
65
+ * @return a {@code Enumerable} object
66
+ */
67
+ public Enumerable<Object[]> scan(DataContext root) {
58
68
  return new AbstractEnumerable<Object[]>() {
59
- public Enumerator<Object[]> enumerator()
60
- {
69
+ public Enumerator<Object[]> enumerator() {
61
70
  PageEnumerator enumerator = new PageEnumerator(schema, pageConverter.get());
62
71
  if (page.get() != null) {
63
72
  enumerator.setPage(page.get());
@@ -11,25 +11,37 @@ import org.embulk.spi.type.Type;
11
11
  import org.joda.time.DateTimeZone;
12
12
 
13
13
  public class FilterColumnGetterFactory
14
- extends ColumnGetterFactory
15
- {
14
+ extends ColumnGetterFactory {
15
+
16
16
  private final DateTimeZone defaultTimeZone;
17
17
 
18
- public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone)
19
- {
18
+ /**
19
+ * Creates a factory object to create {@code ColumnGetter}s for converting JdbcType to Embulk
20
+ * type.
21
+ *
22
+ * @param to a {@code PageBuilder} object that is passed to column getters.
23
+ * @param defaultTimeZone a {@code DateTimeZone} object passed to timestamp column getters as
24
+ * default.
25
+ */
26
+ public FilterColumnGetterFactory(PageBuilder to, DateTimeZone defaultTimeZone) {
20
27
  super(to, defaultTimeZone);
21
- this.defaultTimeZone = defaultTimeZone; // TODO make change super.defaultTimeZone field protected
28
+ // TODO make change super.defaultTimeZone field protected
29
+ this.defaultTimeZone = defaultTimeZone;
22
30
  }
23
31
 
24
32
  @Override
25
- public ColumnGetter newColumnGetter(JdbcInputConnection con, AbstractJdbcInputPlugin.PluginTask task, JdbcColumn column, JdbcColumnOption option)
26
- {
33
+ public ColumnGetter newColumnGetter(JdbcInputConnection con,
34
+ AbstractJdbcInputPlugin.PluginTask task,
35
+ JdbcColumn column,
36
+ JdbcColumnOption option) {
27
37
  String valueType = option.getValueType();
28
38
  Type toType = getToType(option);
29
- if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType()).equals("timestamp")) {
30
- return new FilterTimestampColumnGetter(to, toType, option.getTimeZone().or(defaultTimeZone));
31
- }
32
- else {
39
+ if (valueType.equals("coalesce") && sqlTypeToValueType(column, column.getSqlType())
40
+ .equals("timestamp")) {
41
+ return new FilterTimestampColumnGetter(to,
42
+ toType,
43
+ option.getTimeZone().or(defaultTimeZone));
44
+ } else {
33
45
  return super.newColumnGetter(con, task, column, option);
34
46
  }
35
47
  }
@@ -1,33 +1,29 @@
1
1
  package org.embulk.filter.calcite.getter;
2
2
 
3
+ import java.sql.ResultSet;
4
+ import java.sql.SQLException;
5
+ import java.util.Calendar;
6
+ import java.util.TimeZone;
7
+
3
8
  import org.embulk.input.jdbc.getter.TimestampColumnGetter;
4
9
  import org.embulk.spi.PageBuilder;
5
10
  import org.embulk.spi.time.Timestamp;
6
11
  import org.embulk.spi.type.Type;
7
12
  import org.joda.time.DateTimeZone;
8
13
 
9
- import java.sql.ResultSet;
10
- import java.sql.SQLException;
11
- import java.util.Calendar;
12
-
13
- import static java.util.Calendar.getInstance;
14
- import static java.util.TimeZone.getTimeZone;
15
-
16
14
  public class FilterTimestampColumnGetter
17
- extends TimestampColumnGetter
18
- {
15
+ extends TimestampColumnGetter {
16
+
19
17
  private static final ThreadLocal<Calendar> calendar = new ThreadLocal<>();
20
18
 
21
- public FilterTimestampColumnGetter(PageBuilder to, Type toType, DateTimeZone timeZone)
22
- {
19
+ public FilterTimestampColumnGetter(PageBuilder to, Type toType, DateTimeZone timeZone) {
23
20
  super(to, toType, null);
24
- calendar.set(getInstance(getTimeZone(timeZone.getID()))); // set TLS here
21
+ calendar.set(Calendar.getInstance(TimeZone.getTimeZone(timeZone.getID()))); // set TLS here
25
22
  }
26
23
 
27
24
  @Override
28
25
  protected void fetch(ResultSet from, int fromIndex)
29
- throws SQLException
30
- {
26
+ throws SQLException {
31
27
  java.sql.Timestamp timestamp = from.getTimestamp(fromIndex, calendar.get());
32
28
  if (timestamp != null) {
33
29
  value = Timestamp.ofEpochSecond(timestamp.getTime() / 1000, timestamp.getNanos());
@@ -1,24 +1,20 @@
1
1
  package org.embulk.filter.calcite;
2
2
 
3
3
  import com.google.common.collect.ImmutableList;
4
+ import java.io.IOException;
5
+ import java.nio.file.Path;
4
6
  import org.embulk.config.ConfigSource;
5
7
  import org.embulk.spi.FilterPlugin;
8
+ import org.embulk.test.EmbulkTests;
6
9
  import org.embulk.test.TestingEmbulk;
10
+ import org.hamcrest.Matchers;
11
+ import org.junit.Assert;
7
12
  import org.junit.Before;
8
13
  import org.junit.Rule;
9
14
  import org.junit.Test;
10
15
 
11
- import java.io.IOException;
12
- import java.nio.file.Path;
13
-
14
- import static org.embulk.test.EmbulkTests.copyResource;
15
- import static org.embulk.test.EmbulkTests.readResource;
16
- import static org.embulk.test.EmbulkTests.readSortedFile;
17
- import static org.hamcrest.Matchers.is;
18
- import static org.junit.Assert.assertThat;
16
+ public class TestCalciteFilterPlugin {
19
17
 
20
- public class TestCalciteFilterPlugin
21
- {
22
18
  private static final String RESOURCE_NAME_PREFIX = "org/embulk/filter/calcite/test/";
23
19
 
24
20
  @Rule
@@ -28,69 +24,99 @@ public class TestCalciteFilterPlugin
28
24
 
29
25
  private ConfigSource baseConfig;
30
26
 
27
+ static void assertRecordsByResource(TestingEmbulk embulk,
28
+ String inConfigYamlResourceName,
29
+ String filterConfigYamlResourceName,
30
+ String sourceCsvResourceName,
31
+ String resultCsvResourceName)
32
+ throws IOException {
33
+ Path inputPath = embulk.createTempFile("csv");
34
+ Path outputPath = embulk.createTempFile("csv");
35
+
36
+ // in: config
37
+ EmbulkTests.copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
38
+ ConfigSource inConfig = embulk.loadYamlResource(
39
+ RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
40
+ .set("path_prefix", inputPath.toAbsolutePath().toString());
41
+
42
+ // remove_columns filter config
43
+ ConfigSource filterConfig = embulk
44
+ .loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
45
+
46
+ TestingEmbulk.RunResult result = embulk.inputBuilder()
47
+ .in(inConfig)
48
+ .filters(ImmutableList.of(filterConfig))
49
+ .outputPath(outputPath)
50
+ .run();
51
+
52
+ Assert.assertThat(EmbulkTests.readSortedFile(outputPath),
53
+ Matchers.is(EmbulkTests.readResource(
54
+ RESOURCE_NAME_PREFIX + resultCsvResourceName)));
55
+ }
56
+
31
57
  @Before
32
- public void setup()
33
- {
58
+ public void setup() {
34
59
  baseConfig = embulk.newConfig();
35
60
  }
36
61
 
37
62
  @Test
38
- public void testSimple() throws Exception
39
- {
40
- assertRecordsByResource(embulk, "test_simple_in.yml", "test_simple_filter.yml",
41
- "test_simple_source.csv", "test_simple_expected.csv");
63
+ public void testSimple() throws Exception {
64
+ assertRecordsByResource(embulk,
65
+ "test_simple_in.yml",
66
+ "test_simple_filter.yml",
67
+ "test_simple_source.csv",
68
+ "test_simple_expected.csv");
42
69
  }
43
70
 
71
+ /**
72
+ * This method was added to confirm #13 is fixed or not.
73
+ *
74
+ * @throws Exception
75
+ *
76
+ * @see https://github.com/muga/embulk-filter-calcite/issues/13
77
+ * @see https://issues.apache.org/jira/browse/CALCITE-1673
78
+ */
44
79
  @Test
45
- public void testIntOperators() throws Exception
46
- {
47
- assertRecordsByResource(embulk, "test_int_ops_in.yml", "test_int_ops_filter.yml",
48
- "test_int_ops_source.csv", "test_int_ops_expected.csv");
80
+ public void testTimestampConversion() throws Exception {
81
+ assertRecordsByResource(embulk, "test_timestamp_conv_in.yml",
82
+ "test_timestamp_conv_filter.yml",
83
+ "test_timestamp_conv_source.csv",
84
+ "test_timestamp_conv_expected.csv");
49
85
  }
50
86
 
51
87
  @Test
52
- public void testWhereIntCondition() throws Exception
53
- {
54
- assertRecordsByResource(embulk, "test_where_int_cond_in.yml", "test_where_int_cond_filter.yml",
55
- "test_where_int_cond_source.csv", "test_where_int_cond_expected.csv");
88
+ public void testIntOperators() throws Exception {
89
+ assertRecordsByResource(embulk,
90
+ "test_int_ops_in.yml",
91
+ "test_int_ops_filter.yml",
92
+ "test_int_ops_source.csv",
93
+ "test_int_ops_expected.csv");
56
94
  }
57
95
 
58
96
  @Test
59
- public void testStringOperators() throws Exception
60
- {
61
- assertRecordsByResource(embulk, "test_string_ops_in.yml", "test_string_ops_filter.yml",
62
- "test_string_ops_source.csv", "test_string_ops_expected.csv");
97
+ public void testWhereIntCondition() throws Exception {
98
+ assertRecordsByResource(embulk,
99
+ "test_where_int_cond_in.yml",
100
+ "test_where_int_cond_filter.yml",
101
+ "test_where_int_cond_source.csv",
102
+ "test_where_int_cond_expected.csv");
63
103
  }
64
104
 
65
105
  @Test
66
- public void testWhereStringCondition() throws Exception
67
- {
68
- assertRecordsByResource(embulk, "test_where_string_cond_in.yml", "test_where_string_cond_filter.yml",
69
- "test_where_string_cond_source.csv", "test_where_string_cond_expected.csv");
106
+ public void testStringOperators() throws Exception {
107
+ assertRecordsByResource(embulk,
108
+ "test_string_ops_in.yml",
109
+ "test_string_ops_filter.yml",
110
+ "test_string_ops_source.csv",
111
+ "test_string_ops_expected.csv");
70
112
  }
71
113
 
72
- static void assertRecordsByResource(TestingEmbulk embulk,
73
- String inConfigYamlResourceName, String filterConfigYamlResourceName,
74
- String sourceCsvResourceName, String resultCsvResourceName)
75
- throws IOException
76
- {
77
- Path inputPath = embulk.createTempFile("csv");
78
- Path outputPath = embulk.createTempFile("csv");
79
-
80
- // in: config
81
- copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
82
- ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
83
- .set("path_prefix", inputPath.toAbsolutePath().toString());
84
-
85
- // remove_columns filter config
86
- ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
87
-
88
- TestingEmbulk.RunResult result = embulk.inputBuilder()
89
- .in(inConfig)
90
- .filters(ImmutableList.of(filterConfig))
91
- .outputPath(outputPath)
92
- .run();
93
-
94
- assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
114
+ @Test
115
+ public void testWhereStringCondition() throws Exception {
116
+ assertRecordsByResource(embulk,
117
+ "test_where_string_cond_in.yml",
118
+ "test_where_string_cond_filter.yml",
119
+ "test_where_string_cond_source.csv",
120
+ "test_where_string_cond_expected.csv");
95
121
  }
96
122
  }