embulk-filter-add_time 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +3 -0
- data/COPYING +14 -0
- data/README.md +212 -0
- data/build.gradle +82 -0
- data/gradle/check.gradle +34 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/add_time.rb +3 -0
- data/src/main/java/org/embulk/filter/add_time/AddTimeFilterPlugin.java +208 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ColumnConverter.java +14 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ColumnDuplicator.java +72 -0
- data/src/main/java/org/embulk/filter/add_time/converter/LongValueCastConverter.java +33 -0
- data/src/main/java/org/embulk/filter/add_time/converter/SchemaConverter.java +257 -0
- data/src/main/java/org/embulk/filter/add_time/converter/SimpleColumnConverter.java +62 -0
- data/src/main/java/org/embulk/filter/add_time/converter/StringValueCastConverter.java +33 -0
- data/src/main/java/org/embulk/filter/add_time/converter/TimestampValueCastConverter.java +23 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueCastConverter.java +108 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueConverter.java +22 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueNoConverter.java +46 -0
- data/src/main/java/org/embulk/filter/add_time/reader/AbstractColumnReader.java +55 -0
- data/src/main/java/org/embulk/filter/add_time/reader/BooleanColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/ColumnReader.java +14 -0
- data/src/main/java/org/embulk/filter/add_time/reader/DoubleColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/LongColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/StringColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/TimeValueGenerator.java +177 -0
- data/src/main/java/org/embulk/filter/add_time/reader/TimestampColumnReader.java +36 -0
- data/src/test/java/org/embulk/filter/add_time/TestAddTimeFilterPlugin.java +416 -0
- data/src/test/java/org/embulk/filter/add_time/converter/TestSchemaConverter.java +338 -0
- metadata +107 -0
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -0,0 +1,208 @@
|
|
1
|
+
package org.embulk.filter.add_time;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.google.common.base.Optional;
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.filter.add_time.converter.SchemaConverter;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.FilterPlugin;
|
14
|
+
import org.embulk.spi.Page;
|
15
|
+
import org.embulk.spi.PageBuilder;
|
16
|
+
import org.embulk.spi.PageOutput;
|
17
|
+
import org.embulk.spi.PageReader;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.time.Timestamp;
|
20
|
+
import org.slf4j.Logger;
|
21
|
+
|
22
|
+
public class AddTimeFilterPlugin
|
23
|
+
implements FilterPlugin
|
24
|
+
{
|
25
|
+
public interface PluginTask
|
26
|
+
extends Task
|
27
|
+
{
|
28
|
+
@Config("to_column")
|
29
|
+
ToColumnConfig getToColumn();
|
30
|
+
|
31
|
+
@Config("from_column")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
Optional<FromColumnConfig> getFromColumn();
|
34
|
+
|
35
|
+
@Config("from_value")
|
36
|
+
@ConfigDefault("null")
|
37
|
+
Optional<FromValueConfig> getFromValue();
|
38
|
+
}
|
39
|
+
|
40
|
+
public interface ToColumnConfig
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("name")
|
44
|
+
String getName();
|
45
|
+
|
46
|
+
@Config("type")
|
47
|
+
@ConfigDefault("\"timestamp\"")
|
48
|
+
String getType();
|
49
|
+
|
50
|
+
@Config("unix_timestamp_unit")
|
51
|
+
@ConfigDefault("\"sec\"")
|
52
|
+
String getUnixTimestampUnit();
|
53
|
+
}
|
54
|
+
|
55
|
+
public interface FromColumnConfig
|
56
|
+
extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
|
57
|
+
{
|
58
|
+
@Config("name")
|
59
|
+
String getName();
|
60
|
+
|
61
|
+
@Config("unix_timestamp_unit")
|
62
|
+
@ConfigDefault("\"sec\"")
|
63
|
+
String getUnixTimestampUnit();
|
64
|
+
|
65
|
+
@Config("timestamp_format")
|
66
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
67
|
+
Optional<String> getFormat();
|
68
|
+
|
69
|
+
@Config("default_timestamp_format")
|
70
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
71
|
+
String getDefaultTimestampFormat();
|
72
|
+
}
|
73
|
+
|
74
|
+
public interface FromValueConfig
|
75
|
+
extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
|
76
|
+
{
|
77
|
+
@Config("mode")
|
78
|
+
@ConfigDefault("\"fixed_time\"")
|
79
|
+
String getMode();
|
80
|
+
|
81
|
+
@Config("value")
|
82
|
+
@ConfigDefault("null")
|
83
|
+
Optional<Object> getValue();
|
84
|
+
|
85
|
+
@Config("from")
|
86
|
+
@ConfigDefault("null")
|
87
|
+
Optional<Object> getFrom();
|
88
|
+
|
89
|
+
@Config("to")
|
90
|
+
@ConfigDefault("null")
|
91
|
+
Optional<Object> getTo();
|
92
|
+
|
93
|
+
@Config("unix_timestamp_unit")
|
94
|
+
@ConfigDefault("\"sec\"")
|
95
|
+
String getUnixTimestampUnit();
|
96
|
+
|
97
|
+
@Config("timestamp_format")
|
98
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
99
|
+
Optional<String> getFormat();
|
100
|
+
|
101
|
+
@Config("default_timestamp_format")
|
102
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
103
|
+
String getDefaultTimestampFormat();
|
104
|
+
}
|
105
|
+
|
106
|
+
public enum UnixTimestampUnit
|
107
|
+
{
|
108
|
+
SEC(1, 1000000000),
|
109
|
+
MILLI(1000, 1000000),
|
110
|
+
MICRO(1000000, 1000),
|
111
|
+
NANO(1000000000, 1);
|
112
|
+
|
113
|
+
private final int secondUnit;
|
114
|
+
private final int nanoUnit;
|
115
|
+
|
116
|
+
UnixTimestampUnit(int secondUnit, int nanoUnit)
|
117
|
+
{
|
118
|
+
this.secondUnit = secondUnit;
|
119
|
+
this.nanoUnit = nanoUnit;
|
120
|
+
}
|
121
|
+
|
122
|
+
public long toLong(Timestamp t)
|
123
|
+
{
|
124
|
+
return t.getEpochSecond() * secondUnit + t.getNano() / nanoUnit;
|
125
|
+
}
|
126
|
+
|
127
|
+
public Timestamp toTimestamp(long t)
|
128
|
+
{
|
129
|
+
return Timestamp.ofEpochSecond(t / secondUnit, (int) (t % secondUnit * nanoUnit));
|
130
|
+
}
|
131
|
+
|
132
|
+
public static UnixTimestampUnit of(String s)
|
133
|
+
{
|
134
|
+
switch (s) {
|
135
|
+
case "sec": return SEC;
|
136
|
+
case "milli": return MILLI;
|
137
|
+
case "micro": return MICRO;
|
138
|
+
case "nano": return NANO;
|
139
|
+
default:
|
140
|
+
throw new ConfigException(
|
141
|
+
String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
private final Logger log;
|
147
|
+
|
148
|
+
public AddTimeFilterPlugin()
|
149
|
+
{
|
150
|
+
this.log = Exec.getLogger(getClass());
|
151
|
+
}
|
152
|
+
|
153
|
+
@Override
|
154
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
155
|
+
FilterPlugin.Control control)
|
156
|
+
{
|
157
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
158
|
+
control.run(task.dump(), new SchemaConverter(log, task, inputSchema).toOutputSchema());
|
159
|
+
}
|
160
|
+
|
161
|
+
@Override
|
162
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
163
|
+
Schema outputSchema, PageOutput output)
|
164
|
+
{
|
165
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
166
|
+
return new PageConverter(log, inputSchema, outputSchema, output, new SchemaConverter(log, task, inputSchema));
|
167
|
+
}
|
168
|
+
|
169
|
+
static class PageConverter
|
170
|
+
implements PageOutput
|
171
|
+
{
|
172
|
+
private final Logger log;
|
173
|
+
private SchemaConverter schemaConverter;
|
174
|
+
private final PageReader pageReader;
|
175
|
+
private final PageBuilder pageBuilder;
|
176
|
+
|
177
|
+
public PageConverter(Logger log, Schema inputSchema, Schema outputSchema, PageOutput output, SchemaConverter schemaConverter)
|
178
|
+
{
|
179
|
+
this.log = log;
|
180
|
+
this.schemaConverter = schemaConverter;
|
181
|
+
this.pageReader = new PageReader(inputSchema);
|
182
|
+
this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
183
|
+
}
|
184
|
+
|
185
|
+
@Override
|
186
|
+
public void add(Page page)
|
187
|
+
{
|
188
|
+
pageReader.setPage(page);
|
189
|
+
|
190
|
+
while (pageReader.nextRecord()) {
|
191
|
+
schemaConverter.convertRecord(pageReader, pageBuilder);
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
@Override
|
196
|
+
public void finish()
|
197
|
+
{
|
198
|
+
pageBuilder.finish();
|
199
|
+
}
|
200
|
+
|
201
|
+
@Override
|
202
|
+
public void close()
|
203
|
+
{
|
204
|
+
pageBuilder.close();
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.Schema;
|
6
|
+
|
7
|
+
public interface ColumnConverter
|
8
|
+
{
|
9
|
+
void update(PageReader pageReader);
|
10
|
+
|
11
|
+
void convert(PageBuilder pageBuilder);
|
12
|
+
|
13
|
+
void addColumn(Schema.Builder schemaBuilder);
|
14
|
+
}
|
@@ -0,0 +1,72 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.filter.add_time.reader.ColumnReader;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class ColumnDuplicator
|
10
|
+
implements ColumnConverter
|
11
|
+
{
|
12
|
+
public static class Builder
|
13
|
+
{
|
14
|
+
private Column column;
|
15
|
+
private ColumnReader duplicator;
|
16
|
+
private ColumnReader duplicatee;
|
17
|
+
|
18
|
+
public Builder()
|
19
|
+
{
|
20
|
+
}
|
21
|
+
|
22
|
+
public Builder setColumn(Column column)
|
23
|
+
{
|
24
|
+
this.column = column;
|
25
|
+
return this;
|
26
|
+
}
|
27
|
+
|
28
|
+
public Builder setDuplicator(ColumnReader duplicator)
|
29
|
+
{
|
30
|
+
this.duplicator = duplicator;
|
31
|
+
return this;
|
32
|
+
}
|
33
|
+
|
34
|
+
public Builder setDuplicatee(ColumnReader duplicatee)
|
35
|
+
{
|
36
|
+
this.duplicatee = duplicatee;
|
37
|
+
return this;
|
38
|
+
}
|
39
|
+
|
40
|
+
public ColumnDuplicator build()
|
41
|
+
{
|
42
|
+
return new ColumnDuplicator(column, duplicator, duplicatee);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
private final Column column;
|
47
|
+
private final ColumnReader duplicator;
|
48
|
+
private final ColumnReader duplicatee;
|
49
|
+
|
50
|
+
private ColumnDuplicator(Column column, ColumnReader duplicator, ColumnReader duplicatee)
|
51
|
+
{
|
52
|
+
this.column = column;
|
53
|
+
this.duplicator = duplicator;
|
54
|
+
this.duplicatee = duplicatee;
|
55
|
+
}
|
56
|
+
|
57
|
+
public void update(PageReader pageReader)
|
58
|
+
{
|
59
|
+
duplicator.readValue(column, pageReader);
|
60
|
+
duplicator.copyTo(duplicatee);
|
61
|
+
}
|
62
|
+
|
63
|
+
public void convert(PageBuilder pageBuilder)
|
64
|
+
{
|
65
|
+
duplicator.convertValue(column, pageBuilder);
|
66
|
+
}
|
67
|
+
|
68
|
+
public void addColumn(Schema.Builder schemaBuilder)
|
69
|
+
{
|
70
|
+
schemaBuilder.add(column.getName(), column.getType());
|
71
|
+
}
|
72
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
|
4
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
|
5
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.UnixTimestampUnit;
|
6
|
+
import org.embulk.spi.Column;
|
7
|
+
import org.embulk.spi.PageBuilder;
|
8
|
+
import org.embulk.spi.time.Timestamp;
|
9
|
+
|
10
|
+
public class LongValueCastConverter
|
11
|
+
extends ValueCastConverter
|
12
|
+
{
|
13
|
+
private final UnixTimestampUnit fromUnixTimestampUnit;
|
14
|
+
|
15
|
+
public LongValueCastConverter(FromColumnConfig fromColumnConfig, ToColumnConfig toColumnConfig)
|
16
|
+
{
|
17
|
+
super(toColumnConfig);
|
18
|
+
this.fromUnixTimestampUnit = UnixTimestampUnit.of(fromColumnConfig.getUnixTimestampUnit());
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void convertValue(final Column column, long value, final PageBuilder pageBuilder)
|
23
|
+
{
|
24
|
+
columnVisitor.setValue(longToTimestamp(value));
|
25
|
+
columnVisitor.setPageBuilder(pageBuilder);
|
26
|
+
column.visit(columnVisitor);
|
27
|
+
}
|
28
|
+
|
29
|
+
private Timestamp longToTimestamp(long value)
|
30
|
+
{
|
31
|
+
return fromUnixTimestampUnit.toTimestamp(value);
|
32
|
+
}
|
33
|
+
}
|
@@ -0,0 +1,257 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.PluginTask;
|
6
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
|
7
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromValueConfig;
|
8
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
|
9
|
+
import org.embulk.filter.add_time.reader.BooleanColumnReader;
|
10
|
+
import org.embulk.filter.add_time.reader.ColumnReader;
|
11
|
+
import org.embulk.filter.add_time.reader.DoubleColumnReader;
|
12
|
+
import org.embulk.filter.add_time.reader.LongColumnReader;
|
13
|
+
import org.embulk.filter.add_time.reader.StringColumnReader;
|
14
|
+
import org.embulk.filter.add_time.reader.TimeValueGenerator;
|
15
|
+
import org.embulk.filter.add_time.reader.TimestampColumnReader;
|
16
|
+
import org.embulk.spi.Column;
|
17
|
+
import org.embulk.spi.ColumnVisitor;
|
18
|
+
import org.embulk.spi.DataException;
|
19
|
+
import org.embulk.spi.PageBuilder;
|
20
|
+
import org.embulk.spi.PageReader;
|
21
|
+
import org.embulk.spi.Schema;
|
22
|
+
import org.embulk.spi.type.BooleanType;
|
23
|
+
import org.embulk.spi.type.DoubleType;
|
24
|
+
import org.embulk.spi.type.LongType;
|
25
|
+
import org.embulk.spi.type.StringType;
|
26
|
+
import org.embulk.spi.type.TimestampType;
|
27
|
+
import org.embulk.spi.type.Type;
|
28
|
+
import org.embulk.spi.type.Types;
|
29
|
+
import org.slf4j.Logger;
|
30
|
+
|
31
|
+
public class SchemaConverter
|
32
|
+
{
|
33
|
+
private final Logger log;
|
34
|
+
private final ColumnConverter[] converters;
|
35
|
+
|
36
|
+
public SchemaConverter(Logger log, PluginTask task, Schema inputSchema)
|
37
|
+
{
|
38
|
+
this.log = log;
|
39
|
+
|
40
|
+
ToColumnConfig toColumnConfig = task.getToColumn();
|
41
|
+
final String toColumnName = toColumnConfig.getName();
|
42
|
+
final Type toColumnType = toToColumnType(toColumnName, toColumnConfig.getType()); // TODO getType should return Type object
|
43
|
+
|
44
|
+
Optional<FromColumnConfig> fromColumnConfig = task.getFromColumn();
|
45
|
+
Optional<FromValueConfig> fromValueConfig = task.getFromValue();
|
46
|
+
|
47
|
+
if (fromColumnConfig.isPresent() && fromValueConfig.isPresent()) {
|
48
|
+
throw new ConfigException("Setting both from_column and from_value is invalid.");
|
49
|
+
}
|
50
|
+
if (!fromColumnConfig.isPresent() && !fromValueConfig.isPresent()) {
|
51
|
+
throw new ConfigException("Setting from_column or from_value is required.");
|
52
|
+
}
|
53
|
+
|
54
|
+
converters = new ColumnConverter[inputSchema.size() + 1];
|
55
|
+
|
56
|
+
for (int i = 0; i < inputSchema.size(); i++) {
|
57
|
+
Column column = inputSchema.getColumn(i);
|
58
|
+
String columnName = column.getName();
|
59
|
+
Type columnType = column.getType();
|
60
|
+
|
61
|
+
final String newColumnName;
|
62
|
+
if (columnName.equals(toColumnName)) {
|
63
|
+
newColumnName = newColumnUniqueName(columnName, inputSchema);
|
64
|
+
log.warn("to_column '{}' is set but '{}' column also exists. The existent '{}' column is renamed to '{}'.",
|
65
|
+
toColumnName, toColumnName, toColumnName, newColumnName);
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
newColumnName = columnName;
|
69
|
+
}
|
70
|
+
|
71
|
+
if (fromColumnConfig.isPresent() && columnName.equals(fromColumnConfig.get().getName())) {
|
72
|
+
if (!columnType.equals(Types.LONG) && !columnType.equals(Types.STRING) && !columnType.equals(Types.TIMESTAMP)) {
|
73
|
+
throw new ConfigException(String.format(
|
74
|
+
"The type of the '%s' column specified as from_column must be long, string or timestamp. But it's %s.", columnName, columnType));
|
75
|
+
}
|
76
|
+
|
77
|
+
ColumnReader duplicatee = newColumnReader(columnType, newValueCastConverter(columnType, fromColumnConfig, toColumnConfig));
|
78
|
+
converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
|
79
|
+
.setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
|
80
|
+
.setColumnReader(duplicatee)
|
81
|
+
.build();
|
82
|
+
converters[i] = new ColumnDuplicator.Builder()
|
83
|
+
.setColumn(new Column(i, newColumnName, columnType))
|
84
|
+
.setDuplicator(newColumnReader(columnType, ValueConverter.NO_CONV))
|
85
|
+
.setDuplicatee(duplicatee)
|
86
|
+
.build();
|
87
|
+
}
|
88
|
+
else {
|
89
|
+
converters[i] = new SimpleColumnConverter.Builder()
|
90
|
+
.setColumn(new Column(i, newColumnName, columnType))
|
91
|
+
.setColumnReader(newColumnReader(columnType, ValueConverter.NO_CONV))
|
92
|
+
.build();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
if (fromValueConfig.isPresent()) {
|
97
|
+
// create column converter for from_value
|
98
|
+
converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
|
99
|
+
.setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
|
100
|
+
.setColumnReader(TimeValueGenerator.newGenerator(fromValueConfig.get(), newValueCastConverter(Types.TIMESTAMP, fromColumnConfig, toColumnConfig)))
|
101
|
+
.build();
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
private static String newColumnUniqueName(String originalName, Schema schema)
|
106
|
+
{
|
107
|
+
String name = originalName;
|
108
|
+
do {
|
109
|
+
name += "_";
|
110
|
+
}
|
111
|
+
while (containsColumnName(schema, name));
|
112
|
+
return name;
|
113
|
+
}
|
114
|
+
|
115
|
+
private static boolean containsColumnName(Schema schema, String name)
|
116
|
+
{
|
117
|
+
for (Column c : schema.getColumns()) {
|
118
|
+
if (c.getName().equals(name)) {
|
119
|
+
return true;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
return false;
|
123
|
+
}
|
124
|
+
|
125
|
+
private static ColumnReader newColumnReader(Type columnType, ValueConverter valueConverter)
|
126
|
+
{
|
127
|
+
if (columnType instanceof BooleanType) {
|
128
|
+
return new BooleanColumnReader(valueConverter);
|
129
|
+
}
|
130
|
+
else if (columnType instanceof LongType) {
|
131
|
+
return new LongColumnReader(valueConverter);
|
132
|
+
}
|
133
|
+
else if (columnType instanceof DoubleType) {
|
134
|
+
return new DoubleColumnReader(valueConverter);
|
135
|
+
}
|
136
|
+
else if (columnType instanceof StringType) {
|
137
|
+
return new StringColumnReader(valueConverter);
|
138
|
+
}
|
139
|
+
else if (columnType instanceof TimestampType) {
|
140
|
+
return new TimestampColumnReader(valueConverter);
|
141
|
+
}
|
142
|
+
// TODO support Json type
|
143
|
+
else {
|
144
|
+
throw new ConfigException("Unsupported type: " + columnType); // TODO after json type support, it should be changed to AssertionError.
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
private static ValueCastConverter newValueCastConverter(Type columnType, Optional<FromColumnConfig> fromColumnConfig, ToColumnConfig toColumnConfig)
|
149
|
+
{
|
150
|
+
if (columnType instanceof LongType) {
|
151
|
+
return new LongValueCastConverter(fromColumnConfig.get(), toColumnConfig);
|
152
|
+
}
|
153
|
+
else if (columnType instanceof StringType) {
|
154
|
+
return new StringValueCastConverter(fromColumnConfig.get(), toColumnConfig);
|
155
|
+
}
|
156
|
+
else if (columnType instanceof TimestampType) {
|
157
|
+
return new TimestampValueCastConverter(toColumnConfig);
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
throw new AssertionError("Unsupported type: " + columnType);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
private static Type toToColumnType(String name, String type)
|
165
|
+
{
|
166
|
+
switch (type) {
|
167
|
+
case "long":
|
168
|
+
return Types.LONG;
|
169
|
+
case "timestamp":
|
170
|
+
return Types.TIMESTAMP;
|
171
|
+
default:
|
172
|
+
throw new ConfigException(String.format( // TODO should return AssertionError
|
173
|
+
"The type of the '{}' column specified as to_column must be long or timestamp. But it's {}.", name, type));
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
public void convertRecord(final PageReader pageReader, final PageBuilder pageBuilder)
|
178
|
+
{
|
179
|
+
try {
|
180
|
+
beginRecordConversion();
|
181
|
+
|
182
|
+
pageReader.getSchema().visitColumns(new ColumnVisitor()
|
183
|
+
{
|
184
|
+
@Override
|
185
|
+
public void booleanColumn(Column column)
|
186
|
+
{
|
187
|
+
updateColumn(column, pageReader);
|
188
|
+
}
|
189
|
+
|
190
|
+
@Override
|
191
|
+
public void longColumn(Column column)
|
192
|
+
{
|
193
|
+
updateColumn(column, pageReader);
|
194
|
+
}
|
195
|
+
|
196
|
+
@Override
|
197
|
+
public void doubleColumn(Column column)
|
198
|
+
{
|
199
|
+
updateColumn(column, pageReader);
|
200
|
+
}
|
201
|
+
|
202
|
+
@Override
|
203
|
+
public void stringColumn(Column column)
|
204
|
+
{
|
205
|
+
updateColumn(column, pageReader);
|
206
|
+
}
|
207
|
+
|
208
|
+
@Override
|
209
|
+
public void timestampColumn(Column column)
|
210
|
+
{
|
211
|
+
updateColumn(column, pageReader);
|
212
|
+
}
|
213
|
+
});
|
214
|
+
|
215
|
+
endRecordConversion(pageBuilder);
|
216
|
+
}
|
217
|
+
catch (RuntimeException e) { // TODO should use AddTimeRecordValidateException or the subclasses
|
218
|
+
log.warn(String.format("Skipped a record (%s).", e.getMessage()), e);
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
private void beginRecordConversion()
|
223
|
+
{
|
224
|
+
}
|
225
|
+
|
226
|
+
private void updateColumn(Column column, PageReader pageReader)
|
227
|
+
{
|
228
|
+
converters[column.getIndex()].update(pageReader);
|
229
|
+
}
|
230
|
+
|
231
|
+
private void endRecordConversion(PageBuilder pageBuilder)
|
232
|
+
{
|
233
|
+
for (ColumnConverter converter : converters) {
|
234
|
+
converter.convert(pageBuilder);
|
235
|
+
}
|
236
|
+
|
237
|
+
pageBuilder.addRecord();
|
238
|
+
}
|
239
|
+
|
240
|
+
public Schema toOutputSchema()
|
241
|
+
{
|
242
|
+
Schema.Builder schemaBuilder = new Schema.Builder();
|
243
|
+
for (ColumnConverter converter : converters) {
|
244
|
+
converter.addColumn(schemaBuilder);
|
245
|
+
}
|
246
|
+
return schemaBuilder.build();
|
247
|
+
}
|
248
|
+
|
249
|
+
static class AddTimeRecordValidateException
|
250
|
+
extends DataException
|
251
|
+
{
|
252
|
+
AddTimeRecordValidateException(Throwable cause)
|
253
|
+
{
|
254
|
+
super(cause);
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|