embulk-filter-timestamp_hs 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ #Wed Mar 02 22:27:43 JST 2016
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-all.zip
data/gradlew ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # Attempt to set APP_HOME
46
+ # Resolve links: $0 may be a link
47
+ PRG="$0"
48
+ # Need this for relative symlinks.
49
+ while [ -h "$PRG" ] ; do
50
+ ls=`ls -ld "$PRG"`
51
+ link=`expr "$ls" : '.*-> \(.*\)$'`
52
+ if expr "$link" : '/.*' > /dev/null; then
53
+ PRG="$link"
54
+ else
55
+ PRG=`dirname "$PRG"`"/$link"
56
+ fi
57
+ done
58
+ SAVED="`pwd`"
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
60
+ APP_HOME="`pwd -P`"
61
+ cd "$SAVED" >/dev/null
62
+
63
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64
+
65
+ # Determine the Java command to use to start the JVM.
66
+ if [ -n "$JAVA_HOME" ] ; then
67
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68
+ # IBM's JDK on AIX uses strange locations for the executables
69
+ JAVACMD="$JAVA_HOME/jre/sh/java"
70
+ else
71
+ JAVACMD="$JAVA_HOME/bin/java"
72
+ fi
73
+ if [ ! -x "$JAVACMD" ] ; then
74
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75
+
76
+ Please set the JAVA_HOME variable in your environment to match the
77
+ location of your Java installation."
78
+ fi
79
+ else
80
+ JAVACMD="java"
81
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82
+
83
+ Please set the JAVA_HOME variable in your environment to match the
84
+ location of your Java installation."
85
+ fi
86
+
87
+ # Increase the maximum file descriptors if we can.
88
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89
+ MAX_FD_LIMIT=`ulimit -H -n`
90
+ if [ $? -eq 0 ] ; then
91
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92
+ MAX_FD="$MAX_FD_LIMIT"
93
+ fi
94
+ ulimit -n $MAX_FD
95
+ if [ $? -ne 0 ] ; then
96
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
97
+ fi
98
+ else
99
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100
+ fi
101
+ fi
102
+
103
+ # For Darwin, add options to specify how the application appears in the dock
104
+ if $darwin; then
105
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106
+ fi
107
+
108
+ # For Cygwin, switch paths to Windows format before running java
109
+ if $cygwin ; then
110
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
113
+
114
+ # We build the pattern for arguments to be converted via cygpath
115
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116
+ SEP=""
117
+ for dir in $ROOTDIRSRAW ; do
118
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
119
+ SEP="|"
120
+ done
121
+ OURCYGPATTERN="(^($ROOTDIRS))"
122
+ # Add a user-defined pattern to the cygpath arguments
123
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125
+ fi
126
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
127
+ i=0
128
+ for arg in "$@" ; do
129
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131
+
132
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134
+ else
135
+ eval `echo args$i`="\"$arg\""
136
+ fi
137
+ i=$((i+1))
138
+ done
139
+ case $i in
140
+ (0) set -- ;;
141
+ (1) set -- "$args0" ;;
142
+ (2) set -- "$args0" "$args1" ;;
143
+ (3) set -- "$args0" "$args1" "$args2" ;;
144
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150
+ esac
151
+ fi
152
+
153
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154
+ function splitJvmOpts() {
155
+ JVM_OPTS=("$@")
156
+ }
157
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159
+
160
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "timestamp_hs", "org.embulk.filter.timestamp_hs.TimestampHsFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,21 @@
1
+ in:
2
+ type: file
3
+ path_prefix: dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+
9
+ filters:
10
+ - type: timestamp_hs
11
+ column_options:
12
+ timestamp: {format: 'yyyy-MM-dd hh:mm:ss.SSS'}
13
+
14
+ out:
15
+ type: file
16
+ path_prefix: out
17
+ file_ext: csv
18
+ formatter:
19
+ type: csv
20
+ column_options:
21
+ timestamp: {format: '%Y-%m-%d %H:%M:%S.%L'}
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: file
3
+ path_prefix: dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: timestamp, format: '%Y-%m-%d %H:%M:%S.%L'}
8
+
9
+ out:
10
+ type: file
11
+ path_prefix: out
12
+ file_ext: csv
13
+ formatter:
14
+ type: csv
15
+ column_options:
16
+ timestamp: {format: '%Y-%m-%d %H:%M:%S.%L'}
@@ -0,0 +1,5 @@
1
+ File::open('dummy.log', 'w') { |f|
2
+ (1..1000000).each {
3
+ f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%L'))
4
+ }
5
+ }
@@ -0,0 +1,210 @@
1
+ package org.embulk.filter.timestamp_hs;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.spi.*;
9
+ import org.embulk.spi.time.Timestamp;
10
+ import org.embulk.spi.time.TimestampFormatter;
11
+ import org.embulk.spi.type.Types;
12
+ import org.joda.time.DateTimeZone;
13
+ import org.msgpack.value.Value;
14
+ import org.slf4j.Logger;
15
+
16
+ import java.text.ParseException;
17
+ import java.text.SimpleDateFormat;
18
+ import java.util.HashMap;
19
+ import java.util.Map;
20
+
21
+
22
+ public class TimestampHsFilterPlugin implements FilterPlugin {
23
+
24
+ public interface PluginTask extends Task {
25
+
26
+ @Config("default_timezone")
27
+ @ConfigDefault("\"UTC\"")
28
+ DateTimeZone getDefaultTimezone();
29
+
30
+ @Config("default_timestamp_format")
31
+ @ConfigDefault("\"yyyy-MM-dd hh:mm:ss\"")
32
+ String getDefaultTimestampFormat();
33
+
34
+ @Config("column_options")
35
+ Map<String, TimestampColumnOption> getColumnOptions();
36
+ }
37
+
38
+ public interface TimestampColumnOption
39
+ extends Task, TimestampFormatter.TimestampColumnOption {
40
+ }
41
+
42
+ private final Logger log;
43
+
44
+ public TimestampHsFilterPlugin() {
45
+ this.log = Exec.getLogger(TimestampHsFilterPlugin.class);
46
+ }
47
+
48
+ @Override
49
+ public void transaction(ConfigSource config,
50
+ Schema inputSchema,
51
+ FilterPlugin.Control control) {
52
+ PluginTask task = config.loadConfig(PluginTask.class);
53
+
54
+ // Validate column names.
55
+ for (String columnName : task.getColumnOptions().keySet()) {
56
+ inputSchema.lookupColumn(columnName);
57
+ }
58
+
59
+ Map<String, TimestampColumnOption> options = task.getColumnOptions();
60
+ Schema.Builder builder = Schema.builder();
61
+
62
+ // Convert type "string" to "timestamp".
63
+ for (Column column : inputSchema.getColumns()) {
64
+ if (options.containsKey(column.getName())) {
65
+ if (column.getType().equals(Types.STRING)) {
66
+ builder.add(column.getName(), Types.TIMESTAMP);
67
+ } else {
68
+ log.warn(String.format(
69
+ "Can not convert to timestamp because '%s' is not string.",
70
+ column.getName()));
71
+ builder.add(column.getName(), column.getType());
72
+ }
73
+ } else {
74
+ builder.add(column.getName(), column.getType());
75
+ }
76
+ }
77
+
78
+ control.run(task.dump(), builder.build());
79
+ }
80
+
81
+ @Override
82
+ public PageOutput open(final TaskSource taskSource,
83
+ final Schema inputSchema,
84
+ final Schema outputSchema,
85
+ final PageOutput output) {
86
+
87
+ PluginTask task = taskSource.loadTask(PluginTask.class);
88
+
89
+ final Map<String, SimpleDateFormat> timestampParsers
90
+ = generateTimestampParsers(task);
91
+
92
+ return new PageOutput() {
93
+ private PageReader reader = new PageReader(inputSchema);
94
+
95
+ private PageBuilder builder = new PageBuilder(
96
+ Exec.getBufferAllocator(),
97
+ outputSchema,
98
+ output);
99
+
100
+ @Override
101
+ public void add(Page page) {
102
+ reader.setPage(page);
103
+ while (reader.nextRecord()) {
104
+ setValues();
105
+ builder.addRecord();
106
+ }
107
+ }
108
+
109
+ private void setValues() {
110
+ for (Column inputColumn : inputSchema.getColumns()) {
111
+ setValue(inputColumn);
112
+ }
113
+ }
114
+
115
+ private void setValue(Column inputColumn) {
116
+ if (reader.isNull(inputColumn)) {
117
+ builder.setNull(inputColumn);
118
+ return;
119
+ }
120
+
121
+ if (timestampParsers.containsKey(inputColumn.getName())
122
+ && inputColumn.getType().equals(Types.STRING)) {
123
+ setTimestampFromString(inputColumn);
124
+ } else {
125
+ setNonConvertedValue(inputColumn);
126
+ }
127
+ }
128
+
129
+ private void setNonConvertedValue(Column inputColumn) {
130
+ if (Types.STRING.equals(inputColumn.getType())) {
131
+ final String value = reader.getString(inputColumn);
132
+ builder.setString(inputColumn, value);
133
+ } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
134
+ final boolean value = reader.getBoolean(inputColumn);
135
+ builder.setBoolean(inputColumn, value);
136
+ } else if (Types.DOUBLE.equals(inputColumn.getType())) {
137
+ final double value = reader.getDouble(inputColumn);
138
+ builder.setDouble(inputColumn, value);
139
+ } else if (Types.LONG.equals(inputColumn.getType())) {
140
+ final long value = reader.getLong(inputColumn);
141
+ builder.setLong(inputColumn, value);
142
+ } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
143
+ final Timestamp value = reader.getTimestamp(inputColumn);
144
+ builder.setTimestamp(inputColumn, value);
145
+ } else if (Types.JSON.equals(inputColumn.getType())) {
146
+ final Value value = reader.getJson(inputColumn);
147
+ builder.setJson(inputColumn, value);
148
+ } else {
149
+ throw new DataException("Unexpected type:" + inputColumn.getType());
150
+ }
151
+ }
152
+
153
+ private void setTimestampFromString(Column inputColumn) {
154
+ String inputText = reader.getString(inputColumn);
155
+
156
+ try {
157
+ SimpleDateFormat parser = timestampParsers.get(
158
+ inputColumn.getName());
159
+
160
+ Timestamp timestamp = Timestamp.ofEpochMilli(
161
+ parser.parse(inputText).getTime());
162
+
163
+ builder.setTimestamp(inputColumn, timestamp);
164
+
165
+ } catch (ParseException e) {
166
+ log.warn(String.format(
167
+ "Could not convert string to timestamp: '%s'",
168
+ inputText));
169
+ builder.setNull(inputColumn);
170
+ }
171
+ }
172
+
173
+ @Override
174
+ public void finish() {
175
+ builder.finish();
176
+ }
177
+
178
+ @Override
179
+ public void close() {
180
+ builder.close();
181
+ }
182
+ };
183
+ }
184
+
185
+ private Map<String, SimpleDateFormat> generateTimestampParsers(PluginTask task) {
186
+ Map<String, TimestampColumnOption> options = task.getColumnOptions();
187
+ Map<String, SimpleDateFormat> timestampParsers = new HashMap<>();
188
+
189
+ for (Map.Entry<String, TimestampColumnOption> entry : options.entrySet()) {
190
+ TimestampColumnOption option = entry.getValue();
191
+
192
+ String format;
193
+ DateTimeZone timezone;
194
+ if (option == null) {
195
+ format = task.getDefaultTimestampFormat();
196
+ timezone = task.getDefaultTimezone();
197
+ } else {
198
+ format = option.getFormat().or(task.getDefaultTimestampFormat());
199
+ timezone = option.getTimeZone().or(task.getDefaultTimezone());
200
+ }
201
+
202
+ SimpleDateFormat sdf = new SimpleDateFormat(format);
203
+ sdf.setTimeZone(timezone.toTimeZone());
204
+
205
+ timestampParsers.put(entry.getKey(), sdf);
206
+ }
207
+
208
+ return timestampParsers;
209
+ }
210
+ }