embulk-filter-add_time 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +7 -0
  4. data/CHANGELOG.md +3 -0
  5. data/COPYING +14 -0
  6. data/README.md +212 -0
  7. data/build.gradle +82 -0
  8. data/gradle/check.gradle +34 -0
  9. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  10. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  11. data/gradlew +164 -0
  12. data/gradlew.bat +90 -0
  13. data/lib/embulk/filter/add_time.rb +3 -0
  14. data/src/main/java/org/embulk/filter/add_time/AddTimeFilterPlugin.java +208 -0
  15. data/src/main/java/org/embulk/filter/add_time/converter/ColumnConverter.java +14 -0
  16. data/src/main/java/org/embulk/filter/add_time/converter/ColumnDuplicator.java +72 -0
  17. data/src/main/java/org/embulk/filter/add_time/converter/LongValueCastConverter.java +33 -0
  18. data/src/main/java/org/embulk/filter/add_time/converter/SchemaConverter.java +257 -0
  19. data/src/main/java/org/embulk/filter/add_time/converter/SimpleColumnConverter.java +62 -0
  20. data/src/main/java/org/embulk/filter/add_time/converter/StringValueCastConverter.java +33 -0
  21. data/src/main/java/org/embulk/filter/add_time/converter/TimestampValueCastConverter.java +23 -0
  22. data/src/main/java/org/embulk/filter/add_time/converter/ValueCastConverter.java +108 -0
  23. data/src/main/java/org/embulk/filter/add_time/converter/ValueConverter.java +22 -0
  24. data/src/main/java/org/embulk/filter/add_time/converter/ValueNoConverter.java +46 -0
  25. data/src/main/java/org/embulk/filter/add_time/reader/AbstractColumnReader.java +55 -0
  26. data/src/main/java/org/embulk/filter/add_time/reader/BooleanColumnReader.java +35 -0
  27. data/src/main/java/org/embulk/filter/add_time/reader/ColumnReader.java +14 -0
  28. data/src/main/java/org/embulk/filter/add_time/reader/DoubleColumnReader.java +35 -0
  29. data/src/main/java/org/embulk/filter/add_time/reader/LongColumnReader.java +35 -0
  30. data/src/main/java/org/embulk/filter/add_time/reader/StringColumnReader.java +35 -0
  31. data/src/main/java/org/embulk/filter/add_time/reader/TimeValueGenerator.java +177 -0
  32. data/src/main/java/org/embulk/filter/add_time/reader/TimestampColumnReader.java +36 -0
  33. data/src/test/java/org/embulk/filter/add_time/TestAddTimeFilterPlugin.java +416 -0
  34. data/src/test/java/org/embulk/filter/add_time/converter/TestSchemaConverter.java +338 -0
  35. metadata +107 -0
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "add_time", "org.embulk.filter.add_time.AddTimeFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,208 @@
1
+ package org.embulk.filter.add_time;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.google.common.base.Optional;
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.filter.add_time.converter.SchemaConverter;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Page;
15
+ import org.embulk.spi.PageBuilder;
16
+ import org.embulk.spi.PageOutput;
17
+ import org.embulk.spi.PageReader;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.time.Timestamp;
20
+ import org.slf4j.Logger;
21
+
22
+ public class AddTimeFilterPlugin
23
+ implements FilterPlugin
24
+ {
25
+ public interface PluginTask
26
+ extends Task
27
+ {
28
+ @Config("to_column")
29
+ ToColumnConfig getToColumn();
30
+
31
+ @Config("from_column")
32
+ @ConfigDefault("null")
33
+ Optional<FromColumnConfig> getFromColumn();
34
+
35
+ @Config("from_value")
36
+ @ConfigDefault("null")
37
+ Optional<FromValueConfig> getFromValue();
38
+ }
39
+
40
+ public interface ToColumnConfig
41
+ extends Task
42
+ {
43
+ @Config("name")
44
+ String getName();
45
+
46
+ @Config("type")
47
+ @ConfigDefault("\"timestamp\"")
48
+ String getType();
49
+
50
+ @Config("unix_timestamp_unit")
51
+ @ConfigDefault("\"sec\"")
52
+ String getUnixTimestampUnit();
53
+ }
54
+
55
+ public interface FromColumnConfig
56
+ extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
57
+ {
58
+ @Config("name")
59
+ String getName();
60
+
61
+ @Config("unix_timestamp_unit")
62
+ @ConfigDefault("\"sec\"")
63
+ String getUnixTimestampUnit();
64
+
65
+ @Config("timestamp_format")
66
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
67
+ Optional<String> getFormat();
68
+
69
+ @Config("default_timestamp_format")
70
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
71
+ String getDefaultTimestampFormat();
72
+ }
73
+
74
+ public interface FromValueConfig
75
+ extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
76
+ {
77
+ @Config("mode")
78
+ @ConfigDefault("\"fixed_time\"")
79
+ String getMode();
80
+
81
+ @Config("value")
82
+ @ConfigDefault("null")
83
+ Optional<Object> getValue();
84
+
85
+ @Config("from")
86
+ @ConfigDefault("null")
87
+ Optional<Object> getFrom();
88
+
89
+ @Config("to")
90
+ @ConfigDefault("null")
91
+ Optional<Object> getTo();
92
+
93
+ @Config("unix_timestamp_unit")
94
+ @ConfigDefault("\"sec\"")
95
+ String getUnixTimestampUnit();
96
+
97
+ @Config("timestamp_format")
98
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
99
+ Optional<String> getFormat();
100
+
101
+ @Config("default_timestamp_format")
102
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
103
+ String getDefaultTimestampFormat();
104
+ }
105
+
106
+ public enum UnixTimestampUnit
107
+ {
108
+ SEC(1, 1000000000),
109
+ MILLI(1000, 1000000),
110
+ MICRO(1000000, 1000),
111
+ NANO(1000000000, 1);
112
+
113
+ private final int secondUnit;
114
+ private final int nanoUnit;
115
+
116
+ UnixTimestampUnit(int secondUnit, int nanoUnit)
117
+ {
118
+ this.secondUnit = secondUnit;
119
+ this.nanoUnit = nanoUnit;
120
+ }
121
+
122
+ public long toLong(Timestamp t)
123
+ {
124
+ return t.getEpochSecond() * secondUnit + t.getNano() / nanoUnit;
125
+ }
126
+
127
+ public Timestamp toTimestamp(long t)
128
+ {
129
+ return Timestamp.ofEpochSecond(t / secondUnit, (int) (t % secondUnit * nanoUnit));
130
+ }
131
+
132
+ public static UnixTimestampUnit of(String s)
133
+ {
134
+ switch (s) {
135
+ case "sec": return SEC;
136
+ case "milli": return MILLI;
137
+ case "micro": return MICRO;
138
+ case "nano": return NANO;
139
+ default:
140
+ throw new ConfigException(
141
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
142
+ }
143
+ }
144
+ }
145
+
146
+ private final Logger log;
147
+
148
+ public AddTimeFilterPlugin()
149
+ {
150
+ this.log = Exec.getLogger(getClass());
151
+ }
152
+
153
+ @Override
154
+ public void transaction(ConfigSource config, Schema inputSchema,
155
+ FilterPlugin.Control control)
156
+ {
157
+ PluginTask task = config.loadConfig(PluginTask.class);
158
+ control.run(task.dump(), new SchemaConverter(log, task, inputSchema).toOutputSchema());
159
+ }
160
+
161
+ @Override
162
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
163
+ Schema outputSchema, PageOutput output)
164
+ {
165
+ PluginTask task = taskSource.loadTask(PluginTask.class);
166
+ return new PageConverter(log, inputSchema, outputSchema, output, new SchemaConverter(log, task, inputSchema));
167
+ }
168
+
169
+ static class PageConverter
170
+ implements PageOutput
171
+ {
172
+ private final Logger log;
173
+ private SchemaConverter schemaConverter;
174
+ private final PageReader pageReader;
175
+ private final PageBuilder pageBuilder;
176
+
177
+ public PageConverter(Logger log, Schema inputSchema, Schema outputSchema, PageOutput output, SchemaConverter schemaConverter)
178
+ {
179
+ this.log = log;
180
+ this.schemaConverter = schemaConverter;
181
+ this.pageReader = new PageReader(inputSchema);
182
+ this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
183
+ }
184
+
185
+ @Override
186
+ public void add(Page page)
187
+ {
188
+ pageReader.setPage(page);
189
+
190
+ while (pageReader.nextRecord()) {
191
+ schemaConverter.convertRecord(pageReader, pageBuilder);
192
+ }
193
+ }
194
+
195
+ @Override
196
+ public void finish()
197
+ {
198
+ pageBuilder.finish();
199
+ }
200
+
201
+ @Override
202
+ public void close()
203
+ {
204
+ pageBuilder.close();
205
+ }
206
+ }
207
+
208
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.spi.PageBuilder;
4
+ import org.embulk.spi.PageReader;
5
+ import org.embulk.spi.Schema;
6
+
7
+ public interface ColumnConverter
8
+ {
9
+ void update(PageReader pageReader);
10
+
11
+ void convert(PageBuilder pageBuilder);
12
+
13
+ void addColumn(Schema.Builder schemaBuilder);
14
+ }
@@ -0,0 +1,72 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.filter.add_time.reader.ColumnReader;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageBuilder;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class ColumnDuplicator
10
+ implements ColumnConverter
11
+ {
12
+ public static class Builder
13
+ {
14
+ private Column column;
15
+ private ColumnReader duplicator;
16
+ private ColumnReader duplicatee;
17
+
18
+ public Builder()
19
+ {
20
+ }
21
+
22
+ public Builder setColumn(Column column)
23
+ {
24
+ this.column = column;
25
+ return this;
26
+ }
27
+
28
+ public Builder setDuplicator(ColumnReader duplicator)
29
+ {
30
+ this.duplicator = duplicator;
31
+ return this;
32
+ }
33
+
34
+ public Builder setDuplicatee(ColumnReader duplicatee)
35
+ {
36
+ this.duplicatee = duplicatee;
37
+ return this;
38
+ }
39
+
40
+ public ColumnDuplicator build()
41
+ {
42
+ return new ColumnDuplicator(column, duplicator, duplicatee);
43
+ }
44
+ }
45
+
46
+ private final Column column;
47
+ private final ColumnReader duplicator;
48
+ private final ColumnReader duplicatee;
49
+
50
+ private ColumnDuplicator(Column column, ColumnReader duplicator, ColumnReader duplicatee)
51
+ {
52
+ this.column = column;
53
+ this.duplicator = duplicator;
54
+ this.duplicatee = duplicatee;
55
+ }
56
+
57
+ public void update(PageReader pageReader)
58
+ {
59
+ duplicator.readValue(column, pageReader);
60
+ duplicator.copyTo(duplicatee);
61
+ }
62
+
63
+ public void convert(PageBuilder pageBuilder)
64
+ {
65
+ duplicator.convertValue(column, pageBuilder);
66
+ }
67
+
68
+ public void addColumn(Schema.Builder schemaBuilder)
69
+ {
70
+ schemaBuilder.add(column.getName(), column.getType());
71
+ }
72
+ }
@@ -0,0 +1,33 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
4
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
5
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.UnixTimestampUnit;
6
+ import org.embulk.spi.Column;
7
+ import org.embulk.spi.PageBuilder;
8
+ import org.embulk.spi.time.Timestamp;
9
+
10
+ public class LongValueCastConverter
11
+ extends ValueCastConverter
12
+ {
13
+ private final UnixTimestampUnit fromUnixTimestampUnit;
14
+
15
+ public LongValueCastConverter(FromColumnConfig fromColumnConfig, ToColumnConfig toColumnConfig)
16
+ {
17
+ super(toColumnConfig);
18
+ this.fromUnixTimestampUnit = UnixTimestampUnit.of(fromColumnConfig.getUnixTimestampUnit());
19
+ }
20
+
21
+ @Override
22
+ public void convertValue(final Column column, long value, final PageBuilder pageBuilder)
23
+ {
24
+ columnVisitor.setValue(longToTimestamp(value));
25
+ columnVisitor.setPageBuilder(pageBuilder);
26
+ column.visit(columnVisitor);
27
+ }
28
+
29
+ private Timestamp longToTimestamp(long value)
30
+ {
31
+ return fromUnixTimestampUnit.toTimestamp(value);
32
+ }
33
+ }
@@ -0,0 +1,257 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.PluginTask;
6
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
7
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromValueConfig;
8
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
9
+ import org.embulk.filter.add_time.reader.BooleanColumnReader;
10
+ import org.embulk.filter.add_time.reader.ColumnReader;
11
+ import org.embulk.filter.add_time.reader.DoubleColumnReader;
12
+ import org.embulk.filter.add_time.reader.LongColumnReader;
13
+ import org.embulk.filter.add_time.reader.StringColumnReader;
14
+ import org.embulk.filter.add_time.reader.TimeValueGenerator;
15
+ import org.embulk.filter.add_time.reader.TimestampColumnReader;
16
+ import org.embulk.spi.Column;
17
+ import org.embulk.spi.ColumnVisitor;
18
+ import org.embulk.spi.DataException;
19
+ import org.embulk.spi.PageBuilder;
20
+ import org.embulk.spi.PageReader;
21
+ import org.embulk.spi.Schema;
22
+ import org.embulk.spi.type.BooleanType;
23
+ import org.embulk.spi.type.DoubleType;
24
+ import org.embulk.spi.type.LongType;
25
+ import org.embulk.spi.type.StringType;
26
+ import org.embulk.spi.type.TimestampType;
27
+ import org.embulk.spi.type.Type;
28
+ import org.embulk.spi.type.Types;
29
+ import org.slf4j.Logger;
30
+
31
+ public class SchemaConverter
32
+ {
33
+ private final Logger log;
34
+ private final ColumnConverter[] converters;
35
+
36
+ public SchemaConverter(Logger log, PluginTask task, Schema inputSchema)
37
+ {
38
+ this.log = log;
39
+
40
+ ToColumnConfig toColumnConfig = task.getToColumn();
41
+ final String toColumnName = toColumnConfig.getName();
42
+ final Type toColumnType = toToColumnType(toColumnName, toColumnConfig.getType()); // TODO getType should return Type object
43
+
44
+ Optional<FromColumnConfig> fromColumnConfig = task.getFromColumn();
45
+ Optional<FromValueConfig> fromValueConfig = task.getFromValue();
46
+
47
+ if (fromColumnConfig.isPresent() && fromValueConfig.isPresent()) {
48
+ throw new ConfigException("Setting both from_column and from_value is invalid.");
49
+ }
50
+ if (!fromColumnConfig.isPresent() && !fromValueConfig.isPresent()) {
51
+ throw new ConfigException("Setting from_column or from_value is required.");
52
+ }
53
+
54
+ converters = new ColumnConverter[inputSchema.size() + 1];
55
+
56
+ for (int i = 0; i < inputSchema.size(); i++) {
57
+ Column column = inputSchema.getColumn(i);
58
+ String columnName = column.getName();
59
+ Type columnType = column.getType();
60
+
61
+ final String newColumnName;
62
+ if (columnName.equals(toColumnName)) {
63
+ newColumnName = newColumnUniqueName(columnName, inputSchema);
64
+ log.warn("to_column '{}' is set but '{}' column also exists. The existent '{}' column is renamed to '{}'.",
65
+ toColumnName, toColumnName, toColumnName, newColumnName);
66
+ }
67
+ else {
68
+ newColumnName = columnName;
69
+ }
70
+
71
+ if (fromColumnConfig.isPresent() && columnName.equals(fromColumnConfig.get().getName())) {
72
+ if (!columnType.equals(Types.LONG) && !columnType.equals(Types.STRING) && !columnType.equals(Types.TIMESTAMP)) {
73
+ throw new ConfigException(String.format(
74
+ "The type of the '%s' column specified as from_column must be long, string or timestamp. But it's %s.", columnName, columnType));
75
+ }
76
+
77
+ ColumnReader duplicatee = newColumnReader(columnType, newValueCastConverter(columnType, fromColumnConfig, toColumnConfig));
78
+ converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
79
+ .setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
80
+ .setColumnReader(duplicatee)
81
+ .build();
82
+ converters[i] = new ColumnDuplicator.Builder()
83
+ .setColumn(new Column(i, newColumnName, columnType))
84
+ .setDuplicator(newColumnReader(columnType, ValueConverter.NO_CONV))
85
+ .setDuplicatee(duplicatee)
86
+ .build();
87
+ }
88
+ else {
89
+ converters[i] = new SimpleColumnConverter.Builder()
90
+ .setColumn(new Column(i, newColumnName, columnType))
91
+ .setColumnReader(newColumnReader(columnType, ValueConverter.NO_CONV))
92
+ .build();
93
+ }
94
+ }
95
+
96
+ if (fromValueConfig.isPresent()) {
97
+ // create column converter for from_value
98
+ converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
99
+ .setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
100
+ .setColumnReader(TimeValueGenerator.newGenerator(fromValueConfig.get(), newValueCastConverter(Types.TIMESTAMP, fromColumnConfig, toColumnConfig)))
101
+ .build();
102
+ }
103
+ }
104
+
105
+ private static String newColumnUniqueName(String originalName, Schema schema)
106
+ {
107
+ String name = originalName;
108
+ do {
109
+ name += "_";
110
+ }
111
+ while (containsColumnName(schema, name));
112
+ return name;
113
+ }
114
+
115
+ private static boolean containsColumnName(Schema schema, String name)
116
+ {
117
+ for (Column c : schema.getColumns()) {
118
+ if (c.getName().equals(name)) {
119
+ return true;
120
+ }
121
+ }
122
+ return false;
123
+ }
124
+
125
+ private static ColumnReader newColumnReader(Type columnType, ValueConverter valueConverter)
126
+ {
127
+ if (columnType instanceof BooleanType) {
128
+ return new BooleanColumnReader(valueConverter);
129
+ }
130
+ else if (columnType instanceof LongType) {
131
+ return new LongColumnReader(valueConverter);
132
+ }
133
+ else if (columnType instanceof DoubleType) {
134
+ return new DoubleColumnReader(valueConverter);
135
+ }
136
+ else if (columnType instanceof StringType) {
137
+ return new StringColumnReader(valueConverter);
138
+ }
139
+ else if (columnType instanceof TimestampType) {
140
+ return new TimestampColumnReader(valueConverter);
141
+ }
142
+ // TODO support Json type
143
+ else {
144
+ throw new ConfigException("Unsupported type: " + columnType); // TODO after json type support, it should be changed to AssertionError.
145
+ }
146
+ }
147
+
148
+ private static ValueCastConverter newValueCastConverter(Type columnType, Optional<FromColumnConfig> fromColumnConfig, ToColumnConfig toColumnConfig)
149
+ {
150
+ if (columnType instanceof LongType) {
151
+ return new LongValueCastConverter(fromColumnConfig.get(), toColumnConfig);
152
+ }
153
+ else if (columnType instanceof StringType) {
154
+ return new StringValueCastConverter(fromColumnConfig.get(), toColumnConfig);
155
+ }
156
+ else if (columnType instanceof TimestampType) {
157
+ return new TimestampValueCastConverter(toColumnConfig);
158
+ }
159
+ else {
160
+ throw new AssertionError("Unsupported type: " + columnType);
161
+ }
162
+ }
163
+
164
+ private static Type toToColumnType(String name, String type)
165
+ {
166
+ switch (type) {
167
+ case "long":
168
+ return Types.LONG;
169
+ case "timestamp":
170
+ return Types.TIMESTAMP;
171
+ default:
172
+ throw new ConfigException(String.format( // TODO should return AssertionError
173
+ "The type of the '{}' column specified as to_column must be long or timestamp. But it's {}.", name, type));
174
+ }
175
+ }
176
+
177
+ public void convertRecord(final PageReader pageReader, final PageBuilder pageBuilder)
178
+ {
179
+ try {
180
+ beginRecordConversion();
181
+
182
+ pageReader.getSchema().visitColumns(new ColumnVisitor()
183
+ {
184
+ @Override
185
+ public void booleanColumn(Column column)
186
+ {
187
+ updateColumn(column, pageReader);
188
+ }
189
+
190
+ @Override
191
+ public void longColumn(Column column)
192
+ {
193
+ updateColumn(column, pageReader);
194
+ }
195
+
196
+ @Override
197
+ public void doubleColumn(Column column)
198
+ {
199
+ updateColumn(column, pageReader);
200
+ }
201
+
202
+ @Override
203
+ public void stringColumn(Column column)
204
+ {
205
+ updateColumn(column, pageReader);
206
+ }
207
+
208
+ @Override
209
+ public void timestampColumn(Column column)
210
+ {
211
+ updateColumn(column, pageReader);
212
+ }
213
+ });
214
+
215
+ endRecordConversion(pageBuilder);
216
+ }
217
+ catch (RuntimeException e) { // TODO should use AddTimeRecordValidateException or the subclasses
218
+ log.warn(String.format("Skipped a record (%s).", e.getMessage()), e);
219
+ }
220
+ }
221
+
222
+ private void beginRecordConversion()
223
+ {
224
+ }
225
+
226
+ private void updateColumn(Column column, PageReader pageReader)
227
+ {
228
+ converters[column.getIndex()].update(pageReader);
229
+ }
230
+
231
+ private void endRecordConversion(PageBuilder pageBuilder)
232
+ {
233
+ for (ColumnConverter converter : converters) {
234
+ converter.convert(pageBuilder);
235
+ }
236
+
237
+ pageBuilder.addRecord();
238
+ }
239
+
240
+ public Schema toOutputSchema()
241
+ {
242
+ Schema.Builder schemaBuilder = new Schema.Builder();
243
+ for (ColumnConverter converter : converters) {
244
+ converter.addColumn(schemaBuilder);
245
+ }
246
+ return schemaBuilder.build();
247
+ }
248
+
249
+ static class AddTimeRecordValidateException
250
+ extends DataException
251
+ {
252
+ AddTimeRecordValidateException(Throwable cause)
253
+ {
254
+ super(cause);
255
+ }
256
+ }
257
+ }