embulk-filter-add_time 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +7 -0
  4. data/CHANGELOG.md +3 -0
  5. data/COPYING +14 -0
  6. data/README.md +212 -0
  7. data/build.gradle +82 -0
  8. data/gradle/check.gradle +34 -0
  9. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  10. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  11. data/gradlew +164 -0
  12. data/gradlew.bat +90 -0
  13. data/lib/embulk/filter/add_time.rb +3 -0
  14. data/src/main/java/org/embulk/filter/add_time/AddTimeFilterPlugin.java +208 -0
  15. data/src/main/java/org/embulk/filter/add_time/converter/ColumnConverter.java +14 -0
  16. data/src/main/java/org/embulk/filter/add_time/converter/ColumnDuplicator.java +72 -0
  17. data/src/main/java/org/embulk/filter/add_time/converter/LongValueCastConverter.java +33 -0
  18. data/src/main/java/org/embulk/filter/add_time/converter/SchemaConverter.java +257 -0
  19. data/src/main/java/org/embulk/filter/add_time/converter/SimpleColumnConverter.java +62 -0
  20. data/src/main/java/org/embulk/filter/add_time/converter/StringValueCastConverter.java +33 -0
  21. data/src/main/java/org/embulk/filter/add_time/converter/TimestampValueCastConverter.java +23 -0
  22. data/src/main/java/org/embulk/filter/add_time/converter/ValueCastConverter.java +108 -0
  23. data/src/main/java/org/embulk/filter/add_time/converter/ValueConverter.java +22 -0
  24. data/src/main/java/org/embulk/filter/add_time/converter/ValueNoConverter.java +46 -0
  25. data/src/main/java/org/embulk/filter/add_time/reader/AbstractColumnReader.java +55 -0
  26. data/src/main/java/org/embulk/filter/add_time/reader/BooleanColumnReader.java +35 -0
  27. data/src/main/java/org/embulk/filter/add_time/reader/ColumnReader.java +14 -0
  28. data/src/main/java/org/embulk/filter/add_time/reader/DoubleColumnReader.java +35 -0
  29. data/src/main/java/org/embulk/filter/add_time/reader/LongColumnReader.java +35 -0
  30. data/src/main/java/org/embulk/filter/add_time/reader/StringColumnReader.java +35 -0
  31. data/src/main/java/org/embulk/filter/add_time/reader/TimeValueGenerator.java +177 -0
  32. data/src/main/java/org/embulk/filter/add_time/reader/TimestampColumnReader.java +36 -0
  33. data/src/test/java/org/embulk/filter/add_time/TestAddTimeFilterPlugin.java +416 -0
  34. data/src/test/java/org/embulk/filter/add_time/converter/TestSchemaConverter.java +338 -0
  35. metadata +107 -0
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "add_time", "org.embulk.filter.add_time.AddTimeFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,208 @@
1
+ package org.embulk.filter.add_time;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.google.common.base.Optional;
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.filter.add_time.converter.SchemaConverter;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Page;
15
+ import org.embulk.spi.PageBuilder;
16
+ import org.embulk.spi.PageOutput;
17
+ import org.embulk.spi.PageReader;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.time.Timestamp;
20
+ import org.slf4j.Logger;
21
+
22
+ public class AddTimeFilterPlugin
23
+ implements FilterPlugin
24
+ {
25
+ public interface PluginTask
26
+ extends Task
27
+ {
28
+ @Config("to_column")
29
+ ToColumnConfig getToColumn();
30
+
31
+ @Config("from_column")
32
+ @ConfigDefault("null")
33
+ Optional<FromColumnConfig> getFromColumn();
34
+
35
+ @Config("from_value")
36
+ @ConfigDefault("null")
37
+ Optional<FromValueConfig> getFromValue();
38
+ }
39
+
40
+ public interface ToColumnConfig
41
+ extends Task
42
+ {
43
+ @Config("name")
44
+ String getName();
45
+
46
+ @Config("type")
47
+ @ConfigDefault("\"timestamp\"")
48
+ String getType();
49
+
50
+ @Config("unix_timestamp_unit")
51
+ @ConfigDefault("\"sec\"")
52
+ String getUnixTimestampUnit();
53
+ }
54
+
55
+ public interface FromColumnConfig
56
+ extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
57
+ {
58
+ @Config("name")
59
+ String getName();
60
+
61
+ @Config("unix_timestamp_unit")
62
+ @ConfigDefault("\"sec\"")
63
+ String getUnixTimestampUnit();
64
+
65
+ @Config("timestamp_format")
66
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
67
+ Optional<String> getFormat();
68
+
69
+ @Config("default_timestamp_format")
70
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
71
+ String getDefaultTimestampFormat();
72
+ }
73
+
74
+ public interface FromValueConfig
75
+ extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
76
+ {
77
+ @Config("mode")
78
+ @ConfigDefault("\"fixed_time\"")
79
+ String getMode();
80
+
81
+ @Config("value")
82
+ @ConfigDefault("null")
83
+ Optional<Object> getValue();
84
+
85
+ @Config("from")
86
+ @ConfigDefault("null")
87
+ Optional<Object> getFrom();
88
+
89
+ @Config("to")
90
+ @ConfigDefault("null")
91
+ Optional<Object> getTo();
92
+
93
+ @Config("unix_timestamp_unit")
94
+ @ConfigDefault("\"sec\"")
95
+ String getUnixTimestampUnit();
96
+
97
+ @Config("timestamp_format")
98
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
99
+ Optional<String> getFormat();
100
+
101
+ @Config("default_timestamp_format")
102
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
103
+ String getDefaultTimestampFormat();
104
+ }
105
+
106
+ public enum UnixTimestampUnit
107
+ {
108
+ SEC(1, 1000000000),
109
+ MILLI(1000, 1000000),
110
+ MICRO(1000000, 1000),
111
+ NANO(1000000000, 1);
112
+
113
+ private final int secondUnit;
114
+ private final int nanoUnit;
115
+
116
+ UnixTimestampUnit(int secondUnit, int nanoUnit)
117
+ {
118
+ this.secondUnit = secondUnit;
119
+ this.nanoUnit = nanoUnit;
120
+ }
121
+
122
+ public long toLong(Timestamp t)
123
+ {
124
+ return t.getEpochSecond() * secondUnit + t.getNano() / nanoUnit;
125
+ }
126
+
127
+ public Timestamp toTimestamp(long t)
128
+ {
129
+ return Timestamp.ofEpochSecond(t / secondUnit, (int) (t % secondUnit * nanoUnit));
130
+ }
131
+
132
+ public static UnixTimestampUnit of(String s)
133
+ {
134
+ switch (s) {
135
+ case "sec": return SEC;
136
+ case "milli": return MILLI;
137
+ case "micro": return MICRO;
138
+ case "nano": return NANO;
139
+ default:
140
+ throw new ConfigException(
141
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
142
+ }
143
+ }
144
+ }
145
+
146
+ private final Logger log;
147
+
148
+ public AddTimeFilterPlugin()
149
+ {
150
+ this.log = Exec.getLogger(getClass());
151
+ }
152
+
153
+ @Override
154
+ public void transaction(ConfigSource config, Schema inputSchema,
155
+ FilterPlugin.Control control)
156
+ {
157
+ PluginTask task = config.loadConfig(PluginTask.class);
158
+ control.run(task.dump(), new SchemaConverter(log, task, inputSchema).toOutputSchema());
159
+ }
160
+
161
+ @Override
162
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
163
+ Schema outputSchema, PageOutput output)
164
+ {
165
+ PluginTask task = taskSource.loadTask(PluginTask.class);
166
+ return new PageConverter(log, inputSchema, outputSchema, output, new SchemaConverter(log, task, inputSchema));
167
+ }
168
+
169
+ static class PageConverter
170
+ implements PageOutput
171
+ {
172
+ private final Logger log;
173
+ private SchemaConverter schemaConverter;
174
+ private final PageReader pageReader;
175
+ private final PageBuilder pageBuilder;
176
+
177
+ public PageConverter(Logger log, Schema inputSchema, Schema outputSchema, PageOutput output, SchemaConverter schemaConverter)
178
+ {
179
+ this.log = log;
180
+ this.schemaConverter = schemaConverter;
181
+ this.pageReader = new PageReader(inputSchema);
182
+ this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
183
+ }
184
+
185
+ @Override
186
+ public void add(Page page)
187
+ {
188
+ pageReader.setPage(page);
189
+
190
+ while (pageReader.nextRecord()) {
191
+ schemaConverter.convertRecord(pageReader, pageBuilder);
192
+ }
193
+ }
194
+
195
+ @Override
196
+ public void finish()
197
+ {
198
+ pageBuilder.finish();
199
+ }
200
+
201
+ @Override
202
+ public void close()
203
+ {
204
+ pageBuilder.close();
205
+ }
206
+ }
207
+
208
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.spi.PageBuilder;
4
+ import org.embulk.spi.PageReader;
5
+ import org.embulk.spi.Schema;
6
+
7
+ public interface ColumnConverter
8
+ {
9
+ void update(PageReader pageReader);
10
+
11
+ void convert(PageBuilder pageBuilder);
12
+
13
+ void addColumn(Schema.Builder schemaBuilder);
14
+ }
@@ -0,0 +1,72 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.filter.add_time.reader.ColumnReader;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageBuilder;
6
+ import org.embulk.spi.PageReader;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class ColumnDuplicator
10
+ implements ColumnConverter
11
+ {
12
+ public static class Builder
13
+ {
14
+ private Column column;
15
+ private ColumnReader duplicator;
16
+ private ColumnReader duplicatee;
17
+
18
+ public Builder()
19
+ {
20
+ }
21
+
22
+ public Builder setColumn(Column column)
23
+ {
24
+ this.column = column;
25
+ return this;
26
+ }
27
+
28
+ public Builder setDuplicator(ColumnReader duplicator)
29
+ {
30
+ this.duplicator = duplicator;
31
+ return this;
32
+ }
33
+
34
+ public Builder setDuplicatee(ColumnReader duplicatee)
35
+ {
36
+ this.duplicatee = duplicatee;
37
+ return this;
38
+ }
39
+
40
+ public ColumnDuplicator build()
41
+ {
42
+ return new ColumnDuplicator(column, duplicator, duplicatee);
43
+ }
44
+ }
45
+
46
+ private final Column column;
47
+ private final ColumnReader duplicator;
48
+ private final ColumnReader duplicatee;
49
+
50
+ private ColumnDuplicator(Column column, ColumnReader duplicator, ColumnReader duplicatee)
51
+ {
52
+ this.column = column;
53
+ this.duplicator = duplicator;
54
+ this.duplicatee = duplicatee;
55
+ }
56
+
57
+ public void update(PageReader pageReader)
58
+ {
59
+ duplicator.readValue(column, pageReader);
60
+ duplicator.copyTo(duplicatee);
61
+ }
62
+
63
+ public void convert(PageBuilder pageBuilder)
64
+ {
65
+ duplicator.convertValue(column, pageBuilder);
66
+ }
67
+
68
+ public void addColumn(Schema.Builder schemaBuilder)
69
+ {
70
+ schemaBuilder.add(column.getName(), column.getType());
71
+ }
72
+ }
@@ -0,0 +1,33 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
4
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
5
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.UnixTimestampUnit;
6
+ import org.embulk.spi.Column;
7
+ import org.embulk.spi.PageBuilder;
8
+ import org.embulk.spi.time.Timestamp;
9
+
10
+ public class LongValueCastConverter
11
+ extends ValueCastConverter
12
+ {
13
+ private final UnixTimestampUnit fromUnixTimestampUnit;
14
+
15
+ public LongValueCastConverter(FromColumnConfig fromColumnConfig, ToColumnConfig toColumnConfig)
16
+ {
17
+ super(toColumnConfig);
18
+ this.fromUnixTimestampUnit = UnixTimestampUnit.of(fromColumnConfig.getUnixTimestampUnit());
19
+ }
20
+
21
+ @Override
22
+ public void convertValue(final Column column, long value, final PageBuilder pageBuilder)
23
+ {
24
+ columnVisitor.setValue(longToTimestamp(value));
25
+ columnVisitor.setPageBuilder(pageBuilder);
26
+ column.visit(columnVisitor);
27
+ }
28
+
29
+ private Timestamp longToTimestamp(long value)
30
+ {
31
+ return fromUnixTimestampUnit.toTimestamp(value);
32
+ }
33
+ }
@@ -0,0 +1,257 @@
1
+ package org.embulk.filter.add_time.converter;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.PluginTask;
6
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
7
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.FromValueConfig;
8
+ import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
9
+ import org.embulk.filter.add_time.reader.BooleanColumnReader;
10
+ import org.embulk.filter.add_time.reader.ColumnReader;
11
+ import org.embulk.filter.add_time.reader.DoubleColumnReader;
12
+ import org.embulk.filter.add_time.reader.LongColumnReader;
13
+ import org.embulk.filter.add_time.reader.StringColumnReader;
14
+ import org.embulk.filter.add_time.reader.TimeValueGenerator;
15
+ import org.embulk.filter.add_time.reader.TimestampColumnReader;
16
+ import org.embulk.spi.Column;
17
+ import org.embulk.spi.ColumnVisitor;
18
+ import org.embulk.spi.DataException;
19
+ import org.embulk.spi.PageBuilder;
20
+ import org.embulk.spi.PageReader;
21
+ import org.embulk.spi.Schema;
22
+ import org.embulk.spi.type.BooleanType;
23
+ import org.embulk.spi.type.DoubleType;
24
+ import org.embulk.spi.type.LongType;
25
+ import org.embulk.spi.type.StringType;
26
+ import org.embulk.spi.type.TimestampType;
27
+ import org.embulk.spi.type.Type;
28
+ import org.embulk.spi.type.Types;
29
+ import org.slf4j.Logger;
30
+
31
+ public class SchemaConverter
32
+ {
33
+ private final Logger log;
34
+ private final ColumnConverter[] converters;
35
+
36
+ public SchemaConverter(Logger log, PluginTask task, Schema inputSchema)
37
+ {
38
+ this.log = log;
39
+
40
+ ToColumnConfig toColumnConfig = task.getToColumn();
41
+ final String toColumnName = toColumnConfig.getName();
42
+ final Type toColumnType = toToColumnType(toColumnName, toColumnConfig.getType()); // TODO getType should return Type object
43
+
44
+ Optional<FromColumnConfig> fromColumnConfig = task.getFromColumn();
45
+ Optional<FromValueConfig> fromValueConfig = task.getFromValue();
46
+
47
+ if (fromColumnConfig.isPresent() && fromValueConfig.isPresent()) {
48
+ throw new ConfigException("Setting both from_column and from_value is invalid.");
49
+ }
50
+ if (!fromColumnConfig.isPresent() && !fromValueConfig.isPresent()) {
51
+ throw new ConfigException("Setting from_column or from_value is required.");
52
+ }
53
+
54
+ converters = new ColumnConverter[inputSchema.size() + 1];
55
+
56
+ for (int i = 0; i < inputSchema.size(); i++) {
57
+ Column column = inputSchema.getColumn(i);
58
+ String columnName = column.getName();
59
+ Type columnType = column.getType();
60
+
61
+ final String newColumnName;
62
+ if (columnName.equals(toColumnName)) {
63
+ newColumnName = newColumnUniqueName(columnName, inputSchema);
64
+ log.warn("to_column '{}' is set but '{}' column also exists. The existent '{}' column is renamed to '{}'.",
65
+ toColumnName, toColumnName, toColumnName, newColumnName);
66
+ }
67
+ else {
68
+ newColumnName = columnName;
69
+ }
70
+
71
+ if (fromColumnConfig.isPresent() && columnName.equals(fromColumnConfig.get().getName())) {
72
+ if (!columnType.equals(Types.LONG) && !columnType.equals(Types.STRING) && !columnType.equals(Types.TIMESTAMP)) {
73
+ throw new ConfigException(String.format(
74
+ "The type of the '%s' column specified as from_column must be long, string or timestamp. But it's %s.", columnName, columnType));
75
+ }
76
+
77
+ ColumnReader duplicatee = newColumnReader(columnType, newValueCastConverter(columnType, fromColumnConfig, toColumnConfig));
78
+ converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
79
+ .setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
80
+ .setColumnReader(duplicatee)
81
+ .build();
82
+ converters[i] = new ColumnDuplicator.Builder()
83
+ .setColumn(new Column(i, newColumnName, columnType))
84
+ .setDuplicator(newColumnReader(columnType, ValueConverter.NO_CONV))
85
+ .setDuplicatee(duplicatee)
86
+ .build();
87
+ }
88
+ else {
89
+ converters[i] = new SimpleColumnConverter.Builder()
90
+ .setColumn(new Column(i, newColumnName, columnType))
91
+ .setColumnReader(newColumnReader(columnType, ValueConverter.NO_CONV))
92
+ .build();
93
+ }
94
+ }
95
+
96
+ if (fromValueConfig.isPresent()) {
97
+ // create column converter for from_value
98
+ converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
99
+ .setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
100
+ .setColumnReader(TimeValueGenerator.newGenerator(fromValueConfig.get(), newValueCastConverter(Types.TIMESTAMP, fromColumnConfig, toColumnConfig)))
101
+ .build();
102
+ }
103
+ }
104
+
105
+ private static String newColumnUniqueName(String originalName, Schema schema)
106
+ {
107
+ String name = originalName;
108
+ do {
109
+ name += "_";
110
+ }
111
+ while (containsColumnName(schema, name));
112
+ return name;
113
+ }
114
+
115
+ private static boolean containsColumnName(Schema schema, String name)
116
+ {
117
+ for (Column c : schema.getColumns()) {
118
+ if (c.getName().equals(name)) {
119
+ return true;
120
+ }
121
+ }
122
+ return false;
123
+ }
124
+
125
+ private static ColumnReader newColumnReader(Type columnType, ValueConverter valueConverter)
126
+ {
127
+ if (columnType instanceof BooleanType) {
128
+ return new BooleanColumnReader(valueConverter);
129
+ }
130
+ else if (columnType instanceof LongType) {
131
+ return new LongColumnReader(valueConverter);
132
+ }
133
+ else if (columnType instanceof DoubleType) {
134
+ return new DoubleColumnReader(valueConverter);
135
+ }
136
+ else if (columnType instanceof StringType) {
137
+ return new StringColumnReader(valueConverter);
138
+ }
139
+ else if (columnType instanceof TimestampType) {
140
+ return new TimestampColumnReader(valueConverter);
141
+ }
142
+ // TODO support Json type
143
+ else {
144
+ throw new ConfigException("Unsupported type: " + columnType); // TODO after json type support, it should be changed to AssertionError.
145
+ }
146
+ }
147
+
148
+ private static ValueCastConverter newValueCastConverter(Type columnType, Optional<FromColumnConfig> fromColumnConfig, ToColumnConfig toColumnConfig)
149
+ {
150
+ if (columnType instanceof LongType) {
151
+ return new LongValueCastConverter(fromColumnConfig.get(), toColumnConfig);
152
+ }
153
+ else if (columnType instanceof StringType) {
154
+ return new StringValueCastConverter(fromColumnConfig.get(), toColumnConfig);
155
+ }
156
+ else if (columnType instanceof TimestampType) {
157
+ return new TimestampValueCastConverter(toColumnConfig);
158
+ }
159
+ else {
160
+ throw new AssertionError("Unsupported type: " + columnType);
161
+ }
162
+ }
163
+
164
+ private static Type toToColumnType(String name, String type)
165
+ {
166
+ switch (type) {
167
+ case "long":
168
+ return Types.LONG;
169
+ case "timestamp":
170
+ return Types.TIMESTAMP;
171
+ default:
172
+ throw new ConfigException(String.format( // TODO should return AssertionError
173
+ "The type of the '{}' column specified as to_column must be long or timestamp. But it's {}.", name, type));
174
+ }
175
+ }
176
+
177
+ public void convertRecord(final PageReader pageReader, final PageBuilder pageBuilder)
178
+ {
179
+ try {
180
+ beginRecordConversion();
181
+
182
+ pageReader.getSchema().visitColumns(new ColumnVisitor()
183
+ {
184
+ @Override
185
+ public void booleanColumn(Column column)
186
+ {
187
+ updateColumn(column, pageReader);
188
+ }
189
+
190
+ @Override
191
+ public void longColumn(Column column)
192
+ {
193
+ updateColumn(column, pageReader);
194
+ }
195
+
196
+ @Override
197
+ public void doubleColumn(Column column)
198
+ {
199
+ updateColumn(column, pageReader);
200
+ }
201
+
202
+ @Override
203
+ public void stringColumn(Column column)
204
+ {
205
+ updateColumn(column, pageReader);
206
+ }
207
+
208
+ @Override
209
+ public void timestampColumn(Column column)
210
+ {
211
+ updateColumn(column, pageReader);
212
+ }
213
+ });
214
+
215
+ endRecordConversion(pageBuilder);
216
+ }
217
+ catch (RuntimeException e) { // TODO should use AddTimeRecordValidateException or the subclasses
218
+ log.warn(String.format("Skipped a record (%s).", e.getMessage()), e);
219
+ }
220
+ }
221
+
222
+ private void beginRecordConversion()
223
+ {
224
+ }
225
+
226
+ private void updateColumn(Column column, PageReader pageReader)
227
+ {
228
+ converters[column.getIndex()].update(pageReader);
229
+ }
230
+
231
+ private void endRecordConversion(PageBuilder pageBuilder)
232
+ {
233
+ for (ColumnConverter converter : converters) {
234
+ converter.convert(pageBuilder);
235
+ }
236
+
237
+ pageBuilder.addRecord();
238
+ }
239
+
240
+ public Schema toOutputSchema()
241
+ {
242
+ Schema.Builder schemaBuilder = new Schema.Builder();
243
+ for (ColumnConverter converter : converters) {
244
+ converter.addColumn(schemaBuilder);
245
+ }
246
+ return schemaBuilder.build();
247
+ }
248
+
249
+ static class AddTimeRecordValidateException
250
+ extends DataException
251
+ {
252
+ AddTimeRecordValidateException(Throwable cause)
253
+ {
254
+ super(cause);
255
+ }
256
+ }
257
+ }