embulk-filter-timestamp_format 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -4
- data/build.gradle +4 -4
- data/example/bracket_notation.txt +8 -6
- data/example/from_double.txt +6 -4
- data/example/from_long.txt +6 -4
- data/example/from_string.txt +23 -21
- data/example/from_string_auto_java.txt +6 -4
- data/example/from_string_java.txt +6 -4
- data/example/from_timestamp.txt +6 -4
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +2 -2
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +0 -4
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +7 -9
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +16 -33
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java +7 -10
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java +62 -0
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 451b88351373a8de1875a6fdc5c78e9c91b093e6
|
4
|
+
data.tar.gz: afdf5911609850f33f6f3351fe90eed8d96d8f81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc4a96d6f2b3a3743514fa175e4bfae7fe89d1f8e104b59460a4d9a70e0f9122b5eea007bfe5937782a45935b7186fe36edebdbd3e5d660e4ea6ce73654f43ae
|
7
|
+
data.tar.gz: b3c6057322b0620999b70ab6b49d92bd2154560120ceab0966472775ffcf0b07dbb7748cfe6dac0a53627bd657898039f2069b94eb7380c280b5e6e946883d0f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
# 0.3.0 (2017-08-23)
|
2
|
+
|
3
|
+
Changes:
|
4
|
+
|
5
|
+
* Support new TimestampFormatter and TimestampParser API of embulk >= 0.8.29
|
6
|
+
* Note that this plugin now requires embulk >= 0.8.29
|
7
|
+
|
8
|
+
# 0.2.5 (2017-07-11)
|
9
|
+
|
10
|
+
Enhancements:
|
11
|
+
|
12
|
+
* Leverage new faster jruby timestamp parser introduced in embulk 0.8.27.
|
13
|
+
|
1
14
|
# 0.2.4 (2016-11-06)
|
2
15
|
|
3
16
|
Enhancements:
|
data/README.md
CHANGED
@@ -83,6 +83,8 @@ Following operators of JSONPath are not supported:
|
|
83
83
|
|
84
84
|
## JRuby Timestamp Parser Performance Issue
|
85
85
|
|
86
|
+
**NEWS: (2017/07/10) embulk 0.8.27 is released with a fast Timestamp jruby parser. This issue should be resolved, so Java Timestamp parser support will be dropped in future releases.**
|
87
|
+
|
86
88
|
Embulk's timestamp parser originally uses jruby implementation, but it is slow.
|
87
89
|
To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
|
88
90
|
|
@@ -117,10 +119,10 @@ If you configure `timestamp_parser: auto_java`, this plugin tries to convert rub
|
|
117
119
|
|
118
120
|
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
119
121
|
|
120
|
-
* java parser
|
121
|
-
* java parser
|
122
|
-
* jruby parser
|
123
|
-
* jruby parser
|
122
|
+
* java parser + java formatter: 1.3s
|
123
|
+
* java parser + jruby formatter: 1.4s
|
124
|
+
* jruby parser + java formatter: 64.52s
|
125
|
+
* jruby parser + jruby formatter: 65.06s
|
124
126
|
|
125
127
|
JRuby parser is slow, but JRuby formatter is not so slow.
|
126
128
|
|
data/build.gradle
CHANGED
@@ -13,17 +13,17 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.3.0"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
20
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.8
|
22
|
-
provided "org.embulk:embulk-core:0.8
|
21
|
+
compile "org.embulk:embulk-core:0.8.29+"
|
22
|
+
provided "org.embulk:embulk-core:0.8.29+"
|
23
23
|
compile "io.github.medjed:JsonPathCompiler:0.1.+"
|
24
24
|
|
25
25
|
testCompile "junit:junit:4.+"
|
26
|
-
testCompile "org.embulk:embulk-core:0.
|
26
|
+
testCompile "org.embulk:embulk-core:0.8.27+:tests"
|
27
27
|
}
|
28
28
|
|
29
29
|
checkstyle {
|
@@ -1,10 +1,12 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:24:55.119 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:24:59.552 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:24:59.617 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2017-08-23 17:24:59.618 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:24:59.625 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
6
|
+
2017-08-23 17:24:59.636 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
8
|
| record:json |
|
7
9
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
-
| {"timestamp":
|
9
|
-
| {"timestamp":
|
10
|
+
| {"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"timestamp":1436713200000} |
|
11
|
+
| {"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"timestamp":1436713200100} |
|
10
12
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
data/example/from_double.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:25:14.951 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:19.079 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:19.132 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
|
4
|
+
2017-08-23 17:25:19.133 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:19.138 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
|
6
|
+
2017-08-23 17:25:19.151 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
6
8
|
| double1:string | double2:long | double3:double | double4:timestamp | record:json |
|
7
9
|
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
data/example/from_long.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:25:28.989 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:33.716 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:33.758 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
|
4
|
+
2017-08-23 17:25:33.760 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:33.767 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
|
6
|
+
2017-08-23 17:25:33.780 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
6
8
|
| long1:string | long2:long | long3:double | long4:timestamp | record:json |
|
7
9
|
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
data/example/from_string.txt
CHANGED
@@ -1,21 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
|
9
|
-
|
10
|
-
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 |
|
11
|
-
| 2015-07-13
|
12
|
-
| 2015-07-13 00:00:00.
|
13
|
-
| 2015-07-13 00:00:00.
|
14
|
-
| 2015-07-13 00:00:00.
|
15
|
-
| 2015-07-13 00:00:00.
|
16
|
-
| 2015-07-13 00:00:00.
|
17
|
-
| 2015-07-13 00:00:00.
|
18
|
-
| 2015-07-13 00:00:00.
|
19
|
-
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 |
|
20
|
-
| 2015-07-13 00:00:00.
|
21
|
-
|
1
|
+
2017-08-23 17:25:45.974 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:50.111 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:50.154 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:25:50.155 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:50.160 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:25:50.172 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
7
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
8
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
9
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
13
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
14
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
15
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
16
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
17
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
18
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
19
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
20
|
+
| 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
|
21
|
+
| 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
|
22
|
+
| 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
|
23
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:26:34.305 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:26:38.614 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:26:38.702 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:26:38.704 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:26:38.711 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:26:38.728 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
6
8
|
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
9
|
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:26:56.132 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:27:00.403 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:27:00.454 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:27:00.455 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:27:00.460 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:27:00.474 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
6
8
|
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
9
|
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
data/example/from_timestamp.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:27:14.804 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:27:19.493 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:27:19.591 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
|
4
|
+
2017-08-23 17:27:19.592 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:27:19.600 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
|
6
|
+
2017-08-23 17:27:19.623 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+-------------------+-------------------+-----------------------------+
|
6
8
|
| timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
|
7
9
|
+-------------------------------+-------------------+-------------------+-----------------------------+
|
@@ -88,7 +88,7 @@ public class ColumnCaster
|
|
88
88
|
}
|
89
89
|
}
|
90
90
|
}
|
91
|
-
return new TimestampParser(
|
91
|
+
return new TimestampParser(newFormatList, timezone);
|
92
92
|
}
|
93
93
|
|
94
94
|
private void buildTimestampFormatterMap()
|
@@ -106,7 +106,7 @@ public class ColumnCaster
|
|
106
106
|
{
|
107
107
|
String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
|
108
108
|
DateTimeZone timezone = columnConfig.getToTimeZone().or(task.getDefaultToTimeZone());
|
109
|
-
return new TimestampFormatter(
|
109
|
+
return new TimestampFormatter(format, timezone);
|
110
110
|
}
|
111
111
|
|
112
112
|
private void buildFromTimestampUnitMap()
|
@@ -24,7 +24,6 @@ import org.embulk.spi.type.BooleanType;
|
|
24
24
|
import org.embulk.spi.type.JsonType;
|
25
25
|
import org.embulk.spi.type.TimestampType;
|
26
26
|
import org.embulk.spi.type.Type;
|
27
|
-
import org.jruby.embed.ScriptingContainer;
|
28
27
|
import org.slf4j.Logger;
|
29
28
|
|
30
29
|
import java.util.List;
|
@@ -77,9 +76,6 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
77
76
|
@Config("default_to_timestamp_unit")
|
78
77
|
@ConfigDefault("\"second\"")
|
79
78
|
TimestampUnit getDefaultToTimestampUnit();
|
80
|
-
|
81
|
-
@ConfigInject
|
82
|
-
ScriptingContainer getJRuby();
|
83
79
|
}
|
84
80
|
|
85
81
|
@Override
|
@@ -12,7 +12,6 @@ import org.embulk.spi.util.LineEncoder;
|
|
12
12
|
|
13
13
|
import org.joda.time.DateTime;
|
14
14
|
import org.joda.time.DateTimeZone;
|
15
|
-
import org.jruby.embed.ScriptingContainer;
|
16
15
|
import org.jruby.util.RubyDateFormat;
|
17
16
|
|
18
17
|
import java.util.Locale;
|
@@ -52,16 +51,15 @@ public class TimestampFormatter
|
|
52
51
|
|
53
52
|
public TimestampFormatter(PluginTask task, Optional<? extends TimestampColumnOption> columnOption)
|
54
53
|
{
|
55
|
-
this(
|
56
|
-
columnOption.
|
57
|
-
|
58
|
-
|
59
|
-
columnOption.
|
60
|
-
|
61
|
-
: task.getDefaultToTimeZone());
|
54
|
+
this(columnOption.isPresent() ?
|
55
|
+
columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
|
56
|
+
: task.getDefaultToTimestampFormat(),
|
57
|
+
columnOption.isPresent() ?
|
58
|
+
columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
|
59
|
+
: task.getDefaultToTimeZone());
|
62
60
|
}
|
63
61
|
|
64
|
-
public TimestampFormatter(
|
62
|
+
public TimestampFormatter(String format, DateTimeZone toTimeZone)
|
65
63
|
{
|
66
64
|
this.toTimeZone = toTimeZone;
|
67
65
|
if (format.contains("%")) {
|
@@ -17,7 +17,6 @@ import org.embulk.spi.time.TimestampParseException;
|
|
17
17
|
import org.joda.time.DateTime;
|
18
18
|
import org.joda.time.DateTimeZone;
|
19
19
|
import org.joda.time.format.DateTimeFormatter;
|
20
|
-
import org.jruby.embed.ScriptingContainer;
|
21
20
|
|
22
21
|
import java.util.ArrayList;
|
23
22
|
import java.util.List;
|
@@ -48,30 +47,27 @@ public class TimestampParser {
|
|
48
47
|
Optional<List<String>> getFromFormat();
|
49
48
|
}
|
50
49
|
|
51
|
-
private final List<
|
50
|
+
private final List<org.embulk.spi.time.TimestampParser> jrubyParserList = new ArrayList<>();
|
52
51
|
private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
|
53
52
|
private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
|
54
53
|
private final DateTimeZone defaultFromTimeZone;
|
55
54
|
private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
|
56
55
|
|
57
56
|
TimestampParser(PluginTask task) {
|
58
|
-
this(task.
|
57
|
+
this(task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
|
59
58
|
}
|
60
59
|
|
61
60
|
public TimestampParser(PluginTask task, TimestampColumnOption columnOption) {
|
62
|
-
this(task.
|
63
|
-
|
64
|
-
columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
|
61
|
+
this(columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
|
62
|
+
columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
|
65
63
|
}
|
66
64
|
|
67
|
-
public TimestampParser(
|
68
|
-
JRubyTimeParserHelperFactory helperFactory = (JRubyTimeParserHelperFactory) jruby.runScriptlet("Embulk::Java::TimeParserHelper::Factory.new");
|
69
|
-
|
65
|
+
public TimestampParser(List<String> formatList, DateTimeZone defaultFromTimeZone) {
|
70
66
|
// TODO get default current time from ExecTask.getExecTimestamp
|
71
67
|
for (String format : formatList) {
|
72
68
|
if (format.contains("%")) {
|
73
|
-
|
74
|
-
this.jrubyParserList.add(
|
69
|
+
org.embulk.spi.time.TimestampParser parser = new org.embulk.spi.time.TimestampParser(format, defaultFromTimeZone);
|
70
|
+
this.jrubyParserList.add(parser);
|
75
71
|
} else {
|
76
72
|
// special treatment for nano resolution. n is not originally supported by Joda-Time
|
77
73
|
if (format.contains("nnnnnnnnn")) {
|
@@ -106,38 +102,25 @@ public class TimestampParser {
|
|
106
102
|
}
|
107
103
|
|
108
104
|
private Timestamp jrubyParse(String text) throws TimestampParseException {
|
109
|
-
|
105
|
+
Timestamp timestamp = null;
|
110
106
|
TimestampParseException exception = null;
|
111
107
|
|
112
|
-
|
113
|
-
for (
|
114
|
-
|
108
|
+
org.embulk.spi.time.TimestampParser parser = null;
|
109
|
+
for (org.embulk.spi.time.TimestampParser p : jrubyParserList) {
|
110
|
+
parser = p;
|
115
111
|
try {
|
116
|
-
|
112
|
+
// NOTE: embulk >= 0.8.27 uses new faster jruby timestamp parser, and it supports nano second
|
113
|
+
// NOTE: embulk < 0.8.27 uses old slower jruby timestamp parser, and it supports micro second
|
114
|
+
timestamp = parser.parse(text);
|
117
115
|
break;
|
118
116
|
} catch (TimestampParseException ex) {
|
119
117
|
exception = ex;
|
120
118
|
}
|
121
119
|
}
|
122
|
-
if (
|
120
|
+
if (timestamp == null) {
|
123
121
|
throw exception;
|
124
122
|
}
|
125
|
-
|
126
|
-
String zone = helper.getZone();
|
127
|
-
|
128
|
-
if (zone != null) {
|
129
|
-
// TODO cache parsed zone?
|
130
|
-
timeZone = parseDateTimeZone(zone);
|
131
|
-
if (timeZone == null) {
|
132
|
-
throw new TimestampParseException("Invalid time zone name '" + text + "'");
|
133
|
-
}
|
134
|
-
}
|
135
|
-
|
136
|
-
long localSec = localUsec / 1000000;
|
137
|
-
long usec = localUsec % 1000000;
|
138
|
-
long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
|
139
|
-
|
140
|
-
return Timestamp.ofEpochSecond(sec, usec * 1000);
|
123
|
+
return timestamp;
|
141
124
|
}
|
142
125
|
|
143
126
|
private Timestamp javaParse(String text) throws IllegalArgumentException {
|
@@ -4,7 +4,6 @@ import org.embulk.EmbulkTestRuntime;
|
|
4
4
|
|
5
5
|
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.joda.time.DateTimeZone;
|
7
|
-
import org.jruby.embed.ScriptingContainer;
|
8
7
|
|
9
8
|
import org.junit.Before;
|
10
9
|
import org.junit.Rule;
|
@@ -20,14 +19,12 @@ public class TestTimestampFormatConverter
|
|
20
19
|
{
|
21
20
|
@Rule
|
22
21
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
23
|
-
public ScriptingContainer jruby;
|
24
22
|
public DateTimeZone zone;
|
25
23
|
public Timestamp timestamp;
|
26
24
|
|
27
25
|
@Before
|
28
26
|
public void createResource()
|
29
27
|
{
|
30
|
-
jruby = new ScriptingContainer();
|
31
28
|
zone = DateTimeZone.UTC;
|
32
29
|
timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
|
33
30
|
}
|
@@ -39,14 +36,14 @@ public class TestTimestampFormatConverter
|
|
39
36
|
String rubyFormat = "%" + entry.getKey();
|
40
37
|
String javaFormat = entry.getValue();
|
41
38
|
|
42
|
-
TimestampFormatter rubyFormatter = new TimestampFormatter(
|
43
|
-
TimestampFormatter javaFormatter = new TimestampFormatter(
|
39
|
+
TimestampFormatter rubyFormatter = new TimestampFormatter(rubyFormat, zone);
|
40
|
+
TimestampFormatter javaFormatter = new TimestampFormatter(javaFormat, zone);
|
44
41
|
String rubyFormatted = rubyFormatter.format(timestamp);
|
45
42
|
String javaFormatted = javaFormatter.format(timestamp);
|
46
43
|
// System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
|
47
44
|
|
48
|
-
TimestampParser rubyParser = new TimestampParser(
|
49
|
-
TimestampParser javaParser = new TimestampParser(
|
45
|
+
TimestampParser rubyParser = new TimestampParser(Arrays.asList("." + rubyFormat), zone);
|
46
|
+
TimestampParser javaParser = new TimestampParser(Arrays.asList("." + javaFormat), zone);
|
50
47
|
Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
|
51
48
|
try {
|
52
49
|
Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
|
@@ -75,7 +72,7 @@ public class TestTimestampFormatConverter
|
|
75
72
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
76
73
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
77
74
|
|
78
|
-
TimestampParser parser = new TimestampParser(
|
75
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
79
76
|
try {
|
80
77
|
parser.parse("2016-05-12 20:14:13.123456789 +09:00");
|
81
78
|
}
|
@@ -88,7 +85,7 @@ public class TestTimestampFormatConverter
|
|
88
85
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
|
89
86
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
90
87
|
|
91
|
-
TimestampParser parser = new TimestampParser(
|
88
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
92
89
|
try {
|
93
90
|
parser.parse("2016-05-12 20:14:13.123456789 UTC");
|
94
91
|
}
|
@@ -101,7 +98,7 @@ public class TestTimestampFormatConverter
|
|
101
98
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
|
102
99
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
103
100
|
|
104
|
-
TimestampParser parser = new TimestampParser(
|
101
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
105
102
|
try {
|
106
103
|
parser.parse("2016-05-12 20:14:13.123456789 +00:00");
|
107
104
|
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.joda.time.DateTimeZone;
|
7
|
+
|
8
|
+
import org.junit.Before;
|
9
|
+
import org.junit.Rule;
|
10
|
+
import org.junit.Test;
|
11
|
+
|
12
|
+
import java.util.Arrays;
|
13
|
+
|
14
|
+
import static org.junit.Assert.assertEquals;
|
15
|
+
import static org.junit.Assert.fail;
|
16
|
+
|
17
|
+
public class TestTimestampParser
|
18
|
+
{
|
19
|
+
@Rule
|
20
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
21
|
+
public DateTimeZone zone;
|
22
|
+
public Timestamp expected;
|
23
|
+
|
24
|
+
@Before
|
25
|
+
public void createResource()
|
26
|
+
{
|
27
|
+
zone = DateTimeZone.UTC;
|
28
|
+
expected = Timestamp.ofEpochSecond(1463065359, 123456789);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Test
|
32
|
+
public void testJRubyParser()
|
33
|
+
{
|
34
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%N %:z";
|
35
|
+
|
36
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(rubyFormat), zone);
|
37
|
+
try {
|
38
|
+
Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
|
39
|
+
// embulk >= 0.8.27 uses new faster jruby Timestamp parser, and it support nano second
|
40
|
+
// embulk < 0.8.27 uses old slow jruby Timestamp parser, and it does not support nano seconds
|
41
|
+
//assertEquals(expected, actual);
|
42
|
+
}
|
43
|
+
catch (IllegalArgumentException ex) {
|
44
|
+
fail();
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
@Test
|
49
|
+
public void testJavaParser()
|
50
|
+
{
|
51
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
52
|
+
|
53
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
54
|
+
try {
|
55
|
+
Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
|
56
|
+
assertEquals(expected, actual);
|
57
|
+
}
|
58
|
+
catch (IllegalArgumentException ex) {
|
59
|
+
fail();
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: A filter plugin for Embulk to change timestamp format.
|
42
42
|
email:
|
43
43
|
- sonots@gmail.com
|
@@ -106,13 +106,13 @@ files:
|
|
106
106
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
107
107
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
108
108
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
109
|
+
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java
|
109
110
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
110
111
|
- classpath/accessors-smart-1.1.jar
|
111
112
|
- classpath/asm-5.0.3.jar
|
112
|
-
- classpath/
|
113
|
-
- classpath/embulk-filter-timestamp_format-0.2.4.jar
|
113
|
+
- classpath/embulk-filter-timestamp_format-0.3.0.jar
|
114
114
|
- classpath/json-smart-2.2.1.jar
|
115
|
-
- classpath/JsonPathCompiler-0.1.
|
115
|
+
- classpath/JsonPathCompiler-0.1.2.jar
|
116
116
|
- classpath/slf4j-api-1.7.21.jar
|
117
117
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
118
118
|
licenses:
|