embulk-filter-add_time 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +3 -0
- data/COPYING +14 -0
- data/README.md +212 -0
- data/build.gradle +82 -0
- data/gradle/check.gradle +34 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/add_time.rb +3 -0
- data/src/main/java/org/embulk/filter/add_time/AddTimeFilterPlugin.java +208 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ColumnConverter.java +14 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ColumnDuplicator.java +72 -0
- data/src/main/java/org/embulk/filter/add_time/converter/LongValueCastConverter.java +33 -0
- data/src/main/java/org/embulk/filter/add_time/converter/SchemaConverter.java +257 -0
- data/src/main/java/org/embulk/filter/add_time/converter/SimpleColumnConverter.java +62 -0
- data/src/main/java/org/embulk/filter/add_time/converter/StringValueCastConverter.java +33 -0
- data/src/main/java/org/embulk/filter/add_time/converter/TimestampValueCastConverter.java +23 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueCastConverter.java +108 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueConverter.java +22 -0
- data/src/main/java/org/embulk/filter/add_time/converter/ValueNoConverter.java +46 -0
- data/src/main/java/org/embulk/filter/add_time/reader/AbstractColumnReader.java +55 -0
- data/src/main/java/org/embulk/filter/add_time/reader/BooleanColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/ColumnReader.java +14 -0
- data/src/main/java/org/embulk/filter/add_time/reader/DoubleColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/LongColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/StringColumnReader.java +35 -0
- data/src/main/java/org/embulk/filter/add_time/reader/TimeValueGenerator.java +177 -0
- data/src/main/java/org/embulk/filter/add_time/reader/TimestampColumnReader.java +36 -0
- data/src/test/java/org/embulk/filter/add_time/TestAddTimeFilterPlugin.java +416 -0
- data/src/test/java/org/embulk/filter/add_time/converter/TestSchemaConverter.java +338 -0
- metadata +107 -0
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -0,0 +1,208 @@
|
|
1
|
+
package org.embulk.filter.add_time;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.google.common.base.Optional;
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.filter.add_time.converter.SchemaConverter;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.FilterPlugin;
|
14
|
+
import org.embulk.spi.Page;
|
15
|
+
import org.embulk.spi.PageBuilder;
|
16
|
+
import org.embulk.spi.PageOutput;
|
17
|
+
import org.embulk.spi.PageReader;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.time.Timestamp;
|
20
|
+
import org.slf4j.Logger;
|
21
|
+
|
22
|
+
public class AddTimeFilterPlugin
|
23
|
+
implements FilterPlugin
|
24
|
+
{
|
25
|
+
public interface PluginTask
|
26
|
+
extends Task
|
27
|
+
{
|
28
|
+
@Config("to_column")
|
29
|
+
ToColumnConfig getToColumn();
|
30
|
+
|
31
|
+
@Config("from_column")
|
32
|
+
@ConfigDefault("null")
|
33
|
+
Optional<FromColumnConfig> getFromColumn();
|
34
|
+
|
35
|
+
@Config("from_value")
|
36
|
+
@ConfigDefault("null")
|
37
|
+
Optional<FromValueConfig> getFromValue();
|
38
|
+
}
|
39
|
+
|
40
|
+
public interface ToColumnConfig
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("name")
|
44
|
+
String getName();
|
45
|
+
|
46
|
+
@Config("type")
|
47
|
+
@ConfigDefault("\"timestamp\"")
|
48
|
+
String getType();
|
49
|
+
|
50
|
+
@Config("unix_timestamp_unit")
|
51
|
+
@ConfigDefault("\"sec\"")
|
52
|
+
String getUnixTimestampUnit();
|
53
|
+
}
|
54
|
+
|
55
|
+
public interface FromColumnConfig
|
56
|
+
extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
|
57
|
+
{
|
58
|
+
@Config("name")
|
59
|
+
String getName();
|
60
|
+
|
61
|
+
@Config("unix_timestamp_unit")
|
62
|
+
@ConfigDefault("\"sec\"")
|
63
|
+
String getUnixTimestampUnit();
|
64
|
+
|
65
|
+
@Config("timestamp_format")
|
66
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
67
|
+
Optional<String> getFormat();
|
68
|
+
|
69
|
+
@Config("default_timestamp_format")
|
70
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
71
|
+
String getDefaultTimestampFormat();
|
72
|
+
}
|
73
|
+
|
74
|
+
public interface FromValueConfig
|
75
|
+
extends Task, org.embulk.spi.time.TimestampParser.Task, org.embulk.spi.time.TimestampParser.TimestampColumnOption
|
76
|
+
{
|
77
|
+
@Config("mode")
|
78
|
+
@ConfigDefault("\"fixed_time\"")
|
79
|
+
String getMode();
|
80
|
+
|
81
|
+
@Config("value")
|
82
|
+
@ConfigDefault("null")
|
83
|
+
Optional<Object> getValue();
|
84
|
+
|
85
|
+
@Config("from")
|
86
|
+
@ConfigDefault("null")
|
87
|
+
Optional<Object> getFrom();
|
88
|
+
|
89
|
+
@Config("to")
|
90
|
+
@ConfigDefault("null")
|
91
|
+
Optional<Object> getTo();
|
92
|
+
|
93
|
+
@Config("unix_timestamp_unit")
|
94
|
+
@ConfigDefault("\"sec\"")
|
95
|
+
String getUnixTimestampUnit();
|
96
|
+
|
97
|
+
@Config("timestamp_format")
|
98
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
99
|
+
Optional<String> getFormat();
|
100
|
+
|
101
|
+
@Config("default_timestamp_format")
|
102
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S %z\"") // override default value
|
103
|
+
String getDefaultTimestampFormat();
|
104
|
+
}
|
105
|
+
|
106
|
+
public enum UnixTimestampUnit
|
107
|
+
{
|
108
|
+
SEC(1, 1000000000),
|
109
|
+
MILLI(1000, 1000000),
|
110
|
+
MICRO(1000000, 1000),
|
111
|
+
NANO(1000000000, 1);
|
112
|
+
|
113
|
+
private final int secondUnit;
|
114
|
+
private final int nanoUnit;
|
115
|
+
|
116
|
+
UnixTimestampUnit(int secondUnit, int nanoUnit)
|
117
|
+
{
|
118
|
+
this.secondUnit = secondUnit;
|
119
|
+
this.nanoUnit = nanoUnit;
|
120
|
+
}
|
121
|
+
|
122
|
+
public long toLong(Timestamp t)
|
123
|
+
{
|
124
|
+
return t.getEpochSecond() * secondUnit + t.getNano() / nanoUnit;
|
125
|
+
}
|
126
|
+
|
127
|
+
public Timestamp toTimestamp(long t)
|
128
|
+
{
|
129
|
+
return Timestamp.ofEpochSecond(t / secondUnit, (int) (t % secondUnit * nanoUnit));
|
130
|
+
}
|
131
|
+
|
132
|
+
public static UnixTimestampUnit of(String s)
|
133
|
+
{
|
134
|
+
switch (s) {
|
135
|
+
case "sec": return SEC;
|
136
|
+
case "milli": return MILLI;
|
137
|
+
case "micro": return MICRO;
|
138
|
+
case "nano": return NANO;
|
139
|
+
default:
|
140
|
+
throw new ConfigException(
|
141
|
+
String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
private final Logger log;
|
147
|
+
|
148
|
+
public AddTimeFilterPlugin()
|
149
|
+
{
|
150
|
+
this.log = Exec.getLogger(getClass());
|
151
|
+
}
|
152
|
+
|
153
|
+
@Override
|
154
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
155
|
+
FilterPlugin.Control control)
|
156
|
+
{
|
157
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
158
|
+
control.run(task.dump(), new SchemaConverter(log, task, inputSchema).toOutputSchema());
|
159
|
+
}
|
160
|
+
|
161
|
+
@Override
|
162
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
163
|
+
Schema outputSchema, PageOutput output)
|
164
|
+
{
|
165
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
166
|
+
return new PageConverter(log, inputSchema, outputSchema, output, new SchemaConverter(log, task, inputSchema));
|
167
|
+
}
|
168
|
+
|
169
|
+
static class PageConverter
|
170
|
+
implements PageOutput
|
171
|
+
{
|
172
|
+
private final Logger log;
|
173
|
+
private SchemaConverter schemaConverter;
|
174
|
+
private final PageReader pageReader;
|
175
|
+
private final PageBuilder pageBuilder;
|
176
|
+
|
177
|
+
public PageConverter(Logger log, Schema inputSchema, Schema outputSchema, PageOutput output, SchemaConverter schemaConverter)
|
178
|
+
{
|
179
|
+
this.log = log;
|
180
|
+
this.schemaConverter = schemaConverter;
|
181
|
+
this.pageReader = new PageReader(inputSchema);
|
182
|
+
this.pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
183
|
+
}
|
184
|
+
|
185
|
+
@Override
|
186
|
+
public void add(Page page)
|
187
|
+
{
|
188
|
+
pageReader.setPage(page);
|
189
|
+
|
190
|
+
while (pageReader.nextRecord()) {
|
191
|
+
schemaConverter.convertRecord(pageReader, pageBuilder);
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
@Override
|
196
|
+
public void finish()
|
197
|
+
{
|
198
|
+
pageBuilder.finish();
|
199
|
+
}
|
200
|
+
|
201
|
+
@Override
|
202
|
+
public void close()
|
203
|
+
{
|
204
|
+
pageBuilder.close();
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.PageReader;
|
5
|
+
import org.embulk.spi.Schema;
|
6
|
+
|
7
|
+
public interface ColumnConverter
|
8
|
+
{
|
9
|
+
void update(PageReader pageReader);
|
10
|
+
|
11
|
+
void convert(PageBuilder pageBuilder);
|
12
|
+
|
13
|
+
void addColumn(Schema.Builder schemaBuilder);
|
14
|
+
}
|
@@ -0,0 +1,72 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.filter.add_time.reader.ColumnReader;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class ColumnDuplicator
|
10
|
+
implements ColumnConverter
|
11
|
+
{
|
12
|
+
public static class Builder
|
13
|
+
{
|
14
|
+
private Column column;
|
15
|
+
private ColumnReader duplicator;
|
16
|
+
private ColumnReader duplicatee;
|
17
|
+
|
18
|
+
public Builder()
|
19
|
+
{
|
20
|
+
}
|
21
|
+
|
22
|
+
public Builder setColumn(Column column)
|
23
|
+
{
|
24
|
+
this.column = column;
|
25
|
+
return this;
|
26
|
+
}
|
27
|
+
|
28
|
+
public Builder setDuplicator(ColumnReader duplicator)
|
29
|
+
{
|
30
|
+
this.duplicator = duplicator;
|
31
|
+
return this;
|
32
|
+
}
|
33
|
+
|
34
|
+
public Builder setDuplicatee(ColumnReader duplicatee)
|
35
|
+
{
|
36
|
+
this.duplicatee = duplicatee;
|
37
|
+
return this;
|
38
|
+
}
|
39
|
+
|
40
|
+
public ColumnDuplicator build()
|
41
|
+
{
|
42
|
+
return new ColumnDuplicator(column, duplicator, duplicatee);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
private final Column column;
|
47
|
+
private final ColumnReader duplicator;
|
48
|
+
private final ColumnReader duplicatee;
|
49
|
+
|
50
|
+
private ColumnDuplicator(Column column, ColumnReader duplicator, ColumnReader duplicatee)
|
51
|
+
{
|
52
|
+
this.column = column;
|
53
|
+
this.duplicator = duplicator;
|
54
|
+
this.duplicatee = duplicatee;
|
55
|
+
}
|
56
|
+
|
57
|
+
public void update(PageReader pageReader)
|
58
|
+
{
|
59
|
+
duplicator.readValue(column, pageReader);
|
60
|
+
duplicator.copyTo(duplicatee);
|
61
|
+
}
|
62
|
+
|
63
|
+
public void convert(PageBuilder pageBuilder)
|
64
|
+
{
|
65
|
+
duplicator.convertValue(column, pageBuilder);
|
66
|
+
}
|
67
|
+
|
68
|
+
public void addColumn(Schema.Builder schemaBuilder)
|
69
|
+
{
|
70
|
+
schemaBuilder.add(column.getName(), column.getType());
|
71
|
+
}
|
72
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
|
4
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
|
5
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.UnixTimestampUnit;
|
6
|
+
import org.embulk.spi.Column;
|
7
|
+
import org.embulk.spi.PageBuilder;
|
8
|
+
import org.embulk.spi.time.Timestamp;
|
9
|
+
|
10
|
+
public class LongValueCastConverter
|
11
|
+
extends ValueCastConverter
|
12
|
+
{
|
13
|
+
private final UnixTimestampUnit fromUnixTimestampUnit;
|
14
|
+
|
15
|
+
public LongValueCastConverter(FromColumnConfig fromColumnConfig, ToColumnConfig toColumnConfig)
|
16
|
+
{
|
17
|
+
super(toColumnConfig);
|
18
|
+
this.fromUnixTimestampUnit = UnixTimestampUnit.of(fromColumnConfig.getUnixTimestampUnit());
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void convertValue(final Column column, long value, final PageBuilder pageBuilder)
|
23
|
+
{
|
24
|
+
columnVisitor.setValue(longToTimestamp(value));
|
25
|
+
columnVisitor.setPageBuilder(pageBuilder);
|
26
|
+
column.visit(columnVisitor);
|
27
|
+
}
|
28
|
+
|
29
|
+
private Timestamp longToTimestamp(long value)
|
30
|
+
{
|
31
|
+
return fromUnixTimestampUnit.toTimestamp(value);
|
32
|
+
}
|
33
|
+
}
|
@@ -0,0 +1,257 @@
|
|
1
|
+
package org.embulk.filter.add_time.converter;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.PluginTask;
|
6
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromColumnConfig;
|
7
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.FromValueConfig;
|
8
|
+
import org.embulk.filter.add_time.AddTimeFilterPlugin.ToColumnConfig;
|
9
|
+
import org.embulk.filter.add_time.reader.BooleanColumnReader;
|
10
|
+
import org.embulk.filter.add_time.reader.ColumnReader;
|
11
|
+
import org.embulk.filter.add_time.reader.DoubleColumnReader;
|
12
|
+
import org.embulk.filter.add_time.reader.LongColumnReader;
|
13
|
+
import org.embulk.filter.add_time.reader.StringColumnReader;
|
14
|
+
import org.embulk.filter.add_time.reader.TimeValueGenerator;
|
15
|
+
import org.embulk.filter.add_time.reader.TimestampColumnReader;
|
16
|
+
import org.embulk.spi.Column;
|
17
|
+
import org.embulk.spi.ColumnVisitor;
|
18
|
+
import org.embulk.spi.DataException;
|
19
|
+
import org.embulk.spi.PageBuilder;
|
20
|
+
import org.embulk.spi.PageReader;
|
21
|
+
import org.embulk.spi.Schema;
|
22
|
+
import org.embulk.spi.type.BooleanType;
|
23
|
+
import org.embulk.spi.type.DoubleType;
|
24
|
+
import org.embulk.spi.type.LongType;
|
25
|
+
import org.embulk.spi.type.StringType;
|
26
|
+
import org.embulk.spi.type.TimestampType;
|
27
|
+
import org.embulk.spi.type.Type;
|
28
|
+
import org.embulk.spi.type.Types;
|
29
|
+
import org.slf4j.Logger;
|
30
|
+
|
31
|
+
public class SchemaConverter
|
32
|
+
{
|
33
|
+
private final Logger log;
|
34
|
+
private final ColumnConverter[] converters;
|
35
|
+
|
36
|
+
public SchemaConverter(Logger log, PluginTask task, Schema inputSchema)
|
37
|
+
{
|
38
|
+
this.log = log;
|
39
|
+
|
40
|
+
ToColumnConfig toColumnConfig = task.getToColumn();
|
41
|
+
final String toColumnName = toColumnConfig.getName();
|
42
|
+
final Type toColumnType = toToColumnType(toColumnName, toColumnConfig.getType()); // TODO getType should return Type object
|
43
|
+
|
44
|
+
Optional<FromColumnConfig> fromColumnConfig = task.getFromColumn();
|
45
|
+
Optional<FromValueConfig> fromValueConfig = task.getFromValue();
|
46
|
+
|
47
|
+
if (fromColumnConfig.isPresent() && fromValueConfig.isPresent()) {
|
48
|
+
throw new ConfigException("Setting both from_column and from_value is invalid.");
|
49
|
+
}
|
50
|
+
if (!fromColumnConfig.isPresent() && !fromValueConfig.isPresent()) {
|
51
|
+
throw new ConfigException("Setting from_column or from_value is required.");
|
52
|
+
}
|
53
|
+
|
54
|
+
converters = new ColumnConverter[inputSchema.size() + 1];
|
55
|
+
|
56
|
+
for (int i = 0; i < inputSchema.size(); i++) {
|
57
|
+
Column column = inputSchema.getColumn(i);
|
58
|
+
String columnName = column.getName();
|
59
|
+
Type columnType = column.getType();
|
60
|
+
|
61
|
+
final String newColumnName;
|
62
|
+
if (columnName.equals(toColumnName)) {
|
63
|
+
newColumnName = newColumnUniqueName(columnName, inputSchema);
|
64
|
+
log.warn("to_column '{}' is set but '{}' column also exists. The existent '{}' column is renamed to '{}'.",
|
65
|
+
toColumnName, toColumnName, toColumnName, newColumnName);
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
newColumnName = columnName;
|
69
|
+
}
|
70
|
+
|
71
|
+
if (fromColumnConfig.isPresent() && columnName.equals(fromColumnConfig.get().getName())) {
|
72
|
+
if (!columnType.equals(Types.LONG) && !columnType.equals(Types.STRING) && !columnType.equals(Types.TIMESTAMP)) {
|
73
|
+
throw new ConfigException(String.format(
|
74
|
+
"The type of the '%s' column specified as from_column must be long, string or timestamp. But it's %s.", columnName, columnType));
|
75
|
+
}
|
76
|
+
|
77
|
+
ColumnReader duplicatee = newColumnReader(columnType, newValueCastConverter(columnType, fromColumnConfig, toColumnConfig));
|
78
|
+
converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
|
79
|
+
.setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
|
80
|
+
.setColumnReader(duplicatee)
|
81
|
+
.build();
|
82
|
+
converters[i] = new ColumnDuplicator.Builder()
|
83
|
+
.setColumn(new Column(i, newColumnName, columnType))
|
84
|
+
.setDuplicator(newColumnReader(columnType, ValueConverter.NO_CONV))
|
85
|
+
.setDuplicatee(duplicatee)
|
86
|
+
.build();
|
87
|
+
}
|
88
|
+
else {
|
89
|
+
converters[i] = new SimpleColumnConverter.Builder()
|
90
|
+
.setColumn(new Column(i, newColumnName, columnType))
|
91
|
+
.setColumnReader(newColumnReader(columnType, ValueConverter.NO_CONV))
|
92
|
+
.build();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
if (fromValueConfig.isPresent()) {
|
97
|
+
// create column converter for from_value
|
98
|
+
converters[inputSchema.size()] = new SimpleColumnConverter.Builder()
|
99
|
+
.setColumn(new Column(inputSchema.size(), toColumnName, toColumnType))
|
100
|
+
.setColumnReader(TimeValueGenerator.newGenerator(fromValueConfig.get(), newValueCastConverter(Types.TIMESTAMP, fromColumnConfig, toColumnConfig)))
|
101
|
+
.build();
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
private static String newColumnUniqueName(String originalName, Schema schema)
|
106
|
+
{
|
107
|
+
String name = originalName;
|
108
|
+
do {
|
109
|
+
name += "_";
|
110
|
+
}
|
111
|
+
while (containsColumnName(schema, name));
|
112
|
+
return name;
|
113
|
+
}
|
114
|
+
|
115
|
+
private static boolean containsColumnName(Schema schema, String name)
|
116
|
+
{
|
117
|
+
for (Column c : schema.getColumns()) {
|
118
|
+
if (c.getName().equals(name)) {
|
119
|
+
return true;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
return false;
|
123
|
+
}
|
124
|
+
|
125
|
+
private static ColumnReader newColumnReader(Type columnType, ValueConverter valueConverter)
|
126
|
+
{
|
127
|
+
if (columnType instanceof BooleanType) {
|
128
|
+
return new BooleanColumnReader(valueConverter);
|
129
|
+
}
|
130
|
+
else if (columnType instanceof LongType) {
|
131
|
+
return new LongColumnReader(valueConverter);
|
132
|
+
}
|
133
|
+
else if (columnType instanceof DoubleType) {
|
134
|
+
return new DoubleColumnReader(valueConverter);
|
135
|
+
}
|
136
|
+
else if (columnType instanceof StringType) {
|
137
|
+
return new StringColumnReader(valueConverter);
|
138
|
+
}
|
139
|
+
else if (columnType instanceof TimestampType) {
|
140
|
+
return new TimestampColumnReader(valueConverter);
|
141
|
+
}
|
142
|
+
// TODO support Json type
|
143
|
+
else {
|
144
|
+
throw new ConfigException("Unsupported type: " + columnType); // TODO after json type support, it should be changed to AssertionError.
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
private static ValueCastConverter newValueCastConverter(Type columnType, Optional<FromColumnConfig> fromColumnConfig, ToColumnConfig toColumnConfig)
|
149
|
+
{
|
150
|
+
if (columnType instanceof LongType) {
|
151
|
+
return new LongValueCastConverter(fromColumnConfig.get(), toColumnConfig);
|
152
|
+
}
|
153
|
+
else if (columnType instanceof StringType) {
|
154
|
+
return new StringValueCastConverter(fromColumnConfig.get(), toColumnConfig);
|
155
|
+
}
|
156
|
+
else if (columnType instanceof TimestampType) {
|
157
|
+
return new TimestampValueCastConverter(toColumnConfig);
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
throw new AssertionError("Unsupported type: " + columnType);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
private static Type toToColumnType(String name, String type)
|
165
|
+
{
|
166
|
+
switch (type) {
|
167
|
+
case "long":
|
168
|
+
return Types.LONG;
|
169
|
+
case "timestamp":
|
170
|
+
return Types.TIMESTAMP;
|
171
|
+
default:
|
172
|
+
throw new ConfigException(String.format( // TODO should return AssertionError
|
173
|
+
"The type of the '{}' column specified as to_column must be long or timestamp. But it's {}.", name, type));
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
public void convertRecord(final PageReader pageReader, final PageBuilder pageBuilder)
|
178
|
+
{
|
179
|
+
try {
|
180
|
+
beginRecordConversion();
|
181
|
+
|
182
|
+
pageReader.getSchema().visitColumns(new ColumnVisitor()
|
183
|
+
{
|
184
|
+
@Override
|
185
|
+
public void booleanColumn(Column column)
|
186
|
+
{
|
187
|
+
updateColumn(column, pageReader);
|
188
|
+
}
|
189
|
+
|
190
|
+
@Override
|
191
|
+
public void longColumn(Column column)
|
192
|
+
{
|
193
|
+
updateColumn(column, pageReader);
|
194
|
+
}
|
195
|
+
|
196
|
+
@Override
|
197
|
+
public void doubleColumn(Column column)
|
198
|
+
{
|
199
|
+
updateColumn(column, pageReader);
|
200
|
+
}
|
201
|
+
|
202
|
+
@Override
|
203
|
+
public void stringColumn(Column column)
|
204
|
+
{
|
205
|
+
updateColumn(column, pageReader);
|
206
|
+
}
|
207
|
+
|
208
|
+
@Override
|
209
|
+
public void timestampColumn(Column column)
|
210
|
+
{
|
211
|
+
updateColumn(column, pageReader);
|
212
|
+
}
|
213
|
+
});
|
214
|
+
|
215
|
+
endRecordConversion(pageBuilder);
|
216
|
+
}
|
217
|
+
catch (RuntimeException e) { // TODO should use AddTimeRecordValidateException or the subclasses
|
218
|
+
log.warn(String.format("Skipped a record (%s).", e.getMessage()), e);
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
private void beginRecordConversion()
|
223
|
+
{
|
224
|
+
}
|
225
|
+
|
226
|
+
private void updateColumn(Column column, PageReader pageReader)
|
227
|
+
{
|
228
|
+
converters[column.getIndex()].update(pageReader);
|
229
|
+
}
|
230
|
+
|
231
|
+
private void endRecordConversion(PageBuilder pageBuilder)
|
232
|
+
{
|
233
|
+
for (ColumnConverter converter : converters) {
|
234
|
+
converter.convert(pageBuilder);
|
235
|
+
}
|
236
|
+
|
237
|
+
pageBuilder.addRecord();
|
238
|
+
}
|
239
|
+
|
240
|
+
public Schema toOutputSchema()
|
241
|
+
{
|
242
|
+
Schema.Builder schemaBuilder = new Schema.Builder();
|
243
|
+
for (ColumnConverter converter : converters) {
|
244
|
+
converter.addColumn(schemaBuilder);
|
245
|
+
}
|
246
|
+
return schemaBuilder.build();
|
247
|
+
}
|
248
|
+
|
249
|
+
static class AddTimeRecordValidateException
|
250
|
+
extends DataException
|
251
|
+
{
|
252
|
+
AddTimeRecordValidateException(Throwable cause)
|
253
|
+
{
|
254
|
+
super(cause);
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|