embulk-filter-timestamp_format 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -4
- data/build.gradle +4 -4
- data/example/bracket_notation.txt +8 -6
- data/example/from_double.txt +6 -4
- data/example/from_long.txt +6 -4
- data/example/from_string.txt +23 -21
- data/example/from_string_auto_java.txt +6 -4
- data/example/from_string_java.txt +6 -4
- data/example/from_timestamp.txt +6 -4
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +2 -2
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +0 -4
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +7 -9
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +16 -33
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java +7 -10
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java +62 -0
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 451b88351373a8de1875a6fdc5c78e9c91b093e6
|
4
|
+
data.tar.gz: afdf5911609850f33f6f3351fe90eed8d96d8f81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc4a96d6f2b3a3743514fa175e4bfae7fe89d1f8e104b59460a4d9a70e0f9122b5eea007bfe5937782a45935b7186fe36edebdbd3e5d660e4ea6ce73654f43ae
|
7
|
+
data.tar.gz: b3c6057322b0620999b70ab6b49d92bd2154560120ceab0966472775ffcf0b07dbb7748cfe6dac0a53627bd657898039f2069b94eb7380c280b5e6e946883d0f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
# 0.3.0 (2017-08-23)
|
2
|
+
|
3
|
+
Changes:
|
4
|
+
|
5
|
+
* Support new TimestampFormatter and TimestampParser API of embulk >= 0.8.29
|
6
|
+
* Note that this plugin now requires embulk >= 0.8.29
|
7
|
+
|
8
|
+
# 0.2.5 (2017-07-11)
|
9
|
+
|
10
|
+
Enhancements:
|
11
|
+
|
12
|
+
* Leverage new faster jruby timestamp parser introduced in embulk 0.8.27.
|
13
|
+
|
1
14
|
# 0.2.4 (2016-11-06)
|
2
15
|
|
3
16
|
Enhancements:
|
data/README.md
CHANGED
@@ -83,6 +83,8 @@ Following operators of JSONPath are not supported:
|
|
83
83
|
|
84
84
|
## JRuby Timestamp Parser Performance Issue
|
85
85
|
|
86
|
+
**NEWS: (2017/07/10) embulk 0.8.27 is released with a fast Timestamp jruby parser. This issue should be resolved, so Java Timestamp parser support will be dropped in future releases.**
|
87
|
+
|
86
88
|
Embulk's timestamp parser originally uses jruby implementation, but it is slow.
|
87
89
|
To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
|
88
90
|
|
@@ -117,10 +119,10 @@ If you configure `timestamp_parser: auto_java`, this plugin tries to convert rub
|
|
117
119
|
|
118
120
|
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
119
121
|
|
120
|
-
* java parser
|
121
|
-
* java parser
|
122
|
-
* jruby parser
|
123
|
-
* jruby parser
|
122
|
+
* java parser + java formatter: 1.3s
|
123
|
+
* java parser + jruby formatter: 1.4s
|
124
|
+
* jruby parser + java formatter: 64.52s
|
125
|
+
* jruby parser + jruby formatter: 65.06s
|
124
126
|
|
125
127
|
JRuby parser is slow, but JRuby formatter is not so slow.
|
126
128
|
|
data/build.gradle
CHANGED
@@ -13,17 +13,17 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.3.0"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
20
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.8
|
22
|
-
provided "org.embulk:embulk-core:0.8
|
21
|
+
compile "org.embulk:embulk-core:0.8.29+"
|
22
|
+
provided "org.embulk:embulk-core:0.8.29+"
|
23
23
|
compile "io.github.medjed:JsonPathCompiler:0.1.+"
|
24
24
|
|
25
25
|
testCompile "junit:junit:4.+"
|
26
|
-
testCompile "org.embulk:embulk-core:0.
|
26
|
+
testCompile "org.embulk:embulk-core:0.8.27+:tests"
|
27
27
|
}
|
28
28
|
|
29
29
|
checkstyle {
|
@@ -1,10 +1,12 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:24:55.119 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:24:59.552 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:24:59.617 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2017-08-23 17:24:59.618 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:24:59.625 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
6
|
+
2017-08-23 17:24:59.636 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
8
|
| record:json |
|
7
9
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
-
| {"timestamp":
|
9
|
-
| {"timestamp":
|
10
|
+
| {"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"timestamp":1436713200000} |
|
11
|
+
| {"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"timestamp":1436713200100} |
|
10
12
|
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
data/example/from_double.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:25:14.951 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:19.079 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:19.132 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
|
4
|
+
2017-08-23 17:25:19.133 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:19.138 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
|
6
|
+
2017-08-23 17:25:19.151 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
6
8
|
| double1:string | double2:long | double3:double | double4:timestamp | record:json |
|
7
9
|
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
data/example/from_long.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:25:28.989 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:33.716 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:33.758 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
|
4
|
+
2017-08-23 17:25:33.760 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:33.767 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
|
6
|
+
2017-08-23 17:25:33.780 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
6
8
|
| long1:string | long2:long | long3:double | long4:timestamp | record:json |
|
7
9
|
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
data/example/from_string.txt
CHANGED
@@ -1,21 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
|
9
|
-
|
10
|
-
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 |
|
11
|
-
| 2015-07-13
|
12
|
-
| 2015-07-13 00:00:00.
|
13
|
-
| 2015-07-13 00:00:00.
|
14
|
-
| 2015-07-13 00:00:00.
|
15
|
-
| 2015-07-13 00:00:00.
|
16
|
-
| 2015-07-13 00:00:00.
|
17
|
-
| 2015-07-13 00:00:00.
|
18
|
-
| 2015-07-13 00:00:00.
|
19
|
-
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 |
|
20
|
-
| 2015-07-13 00:00:00.
|
21
|
-
|
1
|
+
2017-08-23 17:25:45.974 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:25:50.111 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:25:50.154 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:25:50.155 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:25:50.160 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:25:50.172 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
7
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
8
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
9
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
13
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
14
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
15
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
16
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
17
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
18
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
19
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
20
|
+
| 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
|
21
|
+
| 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
|
22
|
+
| 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
|
23
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:26:34.305 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:26:38.614 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:26:38.702 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:26:38.704 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:26:38.711 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:26:38.728 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
6
8
|
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
9
|
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:26:56.132 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:27:00.403 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:27:00.454 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2017-08-23 17:27:00.455 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:27:00.460 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
6
|
+
2017-08-23 17:27:00.474 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
6
8
|
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
9
|
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
data/example/from_timestamp.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
2017-08-23 17:27:14.804 +0900: Embulk v0.8.30
|
2
|
+
2017-08-23 17:27:19.493 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2017-08-23 17:27:19.591 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
|
4
|
+
2017-08-23 17:27:19.592 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
|
5
|
+
2017-08-23 17:27:19.600 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
|
6
|
+
2017-08-23 17:27:19.623 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
|
5
7
|
+-------------------------------+-------------------+-------------------+-----------------------------+
|
6
8
|
| timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
|
7
9
|
+-------------------------------+-------------------+-------------------+-----------------------------+
|
@@ -88,7 +88,7 @@ public class ColumnCaster
|
|
88
88
|
}
|
89
89
|
}
|
90
90
|
}
|
91
|
-
return new TimestampParser(
|
91
|
+
return new TimestampParser(newFormatList, timezone);
|
92
92
|
}
|
93
93
|
|
94
94
|
private void buildTimestampFormatterMap()
|
@@ -106,7 +106,7 @@ public class ColumnCaster
|
|
106
106
|
{
|
107
107
|
String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
|
108
108
|
DateTimeZone timezone = columnConfig.getToTimeZone().or(task.getDefaultToTimeZone());
|
109
|
-
return new TimestampFormatter(
|
109
|
+
return new TimestampFormatter(format, timezone);
|
110
110
|
}
|
111
111
|
|
112
112
|
private void buildFromTimestampUnitMap()
|
@@ -24,7 +24,6 @@ import org.embulk.spi.type.BooleanType;
|
|
24
24
|
import org.embulk.spi.type.JsonType;
|
25
25
|
import org.embulk.spi.type.TimestampType;
|
26
26
|
import org.embulk.spi.type.Type;
|
27
|
-
import org.jruby.embed.ScriptingContainer;
|
28
27
|
import org.slf4j.Logger;
|
29
28
|
|
30
29
|
import java.util.List;
|
@@ -77,9 +76,6 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
77
76
|
@Config("default_to_timestamp_unit")
|
78
77
|
@ConfigDefault("\"second\"")
|
79
78
|
TimestampUnit getDefaultToTimestampUnit();
|
80
|
-
|
81
|
-
@ConfigInject
|
82
|
-
ScriptingContainer getJRuby();
|
83
79
|
}
|
84
80
|
|
85
81
|
@Override
|
@@ -12,7 +12,6 @@ import org.embulk.spi.util.LineEncoder;
|
|
12
12
|
|
13
13
|
import org.joda.time.DateTime;
|
14
14
|
import org.joda.time.DateTimeZone;
|
15
|
-
import org.jruby.embed.ScriptingContainer;
|
16
15
|
import org.jruby.util.RubyDateFormat;
|
17
16
|
|
18
17
|
import java.util.Locale;
|
@@ -52,16 +51,15 @@ public class TimestampFormatter
|
|
52
51
|
|
53
52
|
public TimestampFormatter(PluginTask task, Optional<? extends TimestampColumnOption> columnOption)
|
54
53
|
{
|
55
|
-
this(
|
56
|
-
columnOption.
|
57
|
-
|
58
|
-
|
59
|
-
columnOption.
|
60
|
-
|
61
|
-
: task.getDefaultToTimeZone());
|
54
|
+
this(columnOption.isPresent() ?
|
55
|
+
columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
|
56
|
+
: task.getDefaultToTimestampFormat(),
|
57
|
+
columnOption.isPresent() ?
|
58
|
+
columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
|
59
|
+
: task.getDefaultToTimeZone());
|
62
60
|
}
|
63
61
|
|
64
|
-
public TimestampFormatter(
|
62
|
+
public TimestampFormatter(String format, DateTimeZone toTimeZone)
|
65
63
|
{
|
66
64
|
this.toTimeZone = toTimeZone;
|
67
65
|
if (format.contains("%")) {
|
@@ -17,7 +17,6 @@ import org.embulk.spi.time.TimestampParseException;
|
|
17
17
|
import org.joda.time.DateTime;
|
18
18
|
import org.joda.time.DateTimeZone;
|
19
19
|
import org.joda.time.format.DateTimeFormatter;
|
20
|
-
import org.jruby.embed.ScriptingContainer;
|
21
20
|
|
22
21
|
import java.util.ArrayList;
|
23
22
|
import java.util.List;
|
@@ -48,30 +47,27 @@ public class TimestampParser {
|
|
48
47
|
Optional<List<String>> getFromFormat();
|
49
48
|
}
|
50
49
|
|
51
|
-
private final List<
|
50
|
+
private final List<org.embulk.spi.time.TimestampParser> jrubyParserList = new ArrayList<>();
|
52
51
|
private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
|
53
52
|
private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
|
54
53
|
private final DateTimeZone defaultFromTimeZone;
|
55
54
|
private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
|
56
55
|
|
57
56
|
TimestampParser(PluginTask task) {
|
58
|
-
this(task.
|
57
|
+
this(task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
|
59
58
|
}
|
60
59
|
|
61
60
|
public TimestampParser(PluginTask task, TimestampColumnOption columnOption) {
|
62
|
-
this(task.
|
63
|
-
|
64
|
-
columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
|
61
|
+
this(columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
|
62
|
+
columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
|
65
63
|
}
|
66
64
|
|
67
|
-
public TimestampParser(
|
68
|
-
JRubyTimeParserHelperFactory helperFactory = (JRubyTimeParserHelperFactory) jruby.runScriptlet("Embulk::Java::TimeParserHelper::Factory.new");
|
69
|
-
|
65
|
+
public TimestampParser(List<String> formatList, DateTimeZone defaultFromTimeZone) {
|
70
66
|
// TODO get default current time from ExecTask.getExecTimestamp
|
71
67
|
for (String format : formatList) {
|
72
68
|
if (format.contains("%")) {
|
73
|
-
|
74
|
-
this.jrubyParserList.add(
|
69
|
+
org.embulk.spi.time.TimestampParser parser = new org.embulk.spi.time.TimestampParser(format, defaultFromTimeZone);
|
70
|
+
this.jrubyParserList.add(parser);
|
75
71
|
} else {
|
76
72
|
// special treatment for nano resolution. n is not originally supported by Joda-Time
|
77
73
|
if (format.contains("nnnnnnnnn")) {
|
@@ -106,38 +102,25 @@ public class TimestampParser {
|
|
106
102
|
}
|
107
103
|
|
108
104
|
private Timestamp jrubyParse(String text) throws TimestampParseException {
|
109
|
-
|
105
|
+
Timestamp timestamp = null;
|
110
106
|
TimestampParseException exception = null;
|
111
107
|
|
112
|
-
|
113
|
-
for (
|
114
|
-
|
108
|
+
org.embulk.spi.time.TimestampParser parser = null;
|
109
|
+
for (org.embulk.spi.time.TimestampParser p : jrubyParserList) {
|
110
|
+
parser = p;
|
115
111
|
try {
|
116
|
-
|
112
|
+
// NOTE: embulk >= 0.8.27 uses new faster jruby timestamp parser, and it supports nano second
|
113
|
+
// NOTE: embulk < 0.8.27 uses old slower jruby timestamp parser, and it supports micro second
|
114
|
+
timestamp = parser.parse(text);
|
117
115
|
break;
|
118
116
|
} catch (TimestampParseException ex) {
|
119
117
|
exception = ex;
|
120
118
|
}
|
121
119
|
}
|
122
|
-
if (
|
120
|
+
if (timestamp == null) {
|
123
121
|
throw exception;
|
124
122
|
}
|
125
|
-
|
126
|
-
String zone = helper.getZone();
|
127
|
-
|
128
|
-
if (zone != null) {
|
129
|
-
// TODO cache parsed zone?
|
130
|
-
timeZone = parseDateTimeZone(zone);
|
131
|
-
if (timeZone == null) {
|
132
|
-
throw new TimestampParseException("Invalid time zone name '" + text + "'");
|
133
|
-
}
|
134
|
-
}
|
135
|
-
|
136
|
-
long localSec = localUsec / 1000000;
|
137
|
-
long usec = localUsec % 1000000;
|
138
|
-
long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
|
139
|
-
|
140
|
-
return Timestamp.ofEpochSecond(sec, usec * 1000);
|
123
|
+
return timestamp;
|
141
124
|
}
|
142
125
|
|
143
126
|
private Timestamp javaParse(String text) throws IllegalArgumentException {
|
@@ -4,7 +4,6 @@ import org.embulk.EmbulkTestRuntime;
|
|
4
4
|
|
5
5
|
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.joda.time.DateTimeZone;
|
7
|
-
import org.jruby.embed.ScriptingContainer;
|
8
7
|
|
9
8
|
import org.junit.Before;
|
10
9
|
import org.junit.Rule;
|
@@ -20,14 +19,12 @@ public class TestTimestampFormatConverter
|
|
20
19
|
{
|
21
20
|
@Rule
|
22
21
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
23
|
-
public ScriptingContainer jruby;
|
24
22
|
public DateTimeZone zone;
|
25
23
|
public Timestamp timestamp;
|
26
24
|
|
27
25
|
@Before
|
28
26
|
public void createResource()
|
29
27
|
{
|
30
|
-
jruby = new ScriptingContainer();
|
31
28
|
zone = DateTimeZone.UTC;
|
32
29
|
timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
|
33
30
|
}
|
@@ -39,14 +36,14 @@ public class TestTimestampFormatConverter
|
|
39
36
|
String rubyFormat = "%" + entry.getKey();
|
40
37
|
String javaFormat = entry.getValue();
|
41
38
|
|
42
|
-
TimestampFormatter rubyFormatter = new TimestampFormatter(
|
43
|
-
TimestampFormatter javaFormatter = new TimestampFormatter(
|
39
|
+
TimestampFormatter rubyFormatter = new TimestampFormatter(rubyFormat, zone);
|
40
|
+
TimestampFormatter javaFormatter = new TimestampFormatter(javaFormat, zone);
|
44
41
|
String rubyFormatted = rubyFormatter.format(timestamp);
|
45
42
|
String javaFormatted = javaFormatter.format(timestamp);
|
46
43
|
// System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
|
47
44
|
|
48
|
-
TimestampParser rubyParser = new TimestampParser(
|
49
|
-
TimestampParser javaParser = new TimestampParser(
|
45
|
+
TimestampParser rubyParser = new TimestampParser(Arrays.asList("." + rubyFormat), zone);
|
46
|
+
TimestampParser javaParser = new TimestampParser(Arrays.asList("." + javaFormat), zone);
|
50
47
|
Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
|
51
48
|
try {
|
52
49
|
Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
|
@@ -75,7 +72,7 @@ public class TestTimestampFormatConverter
|
|
75
72
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
76
73
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
77
74
|
|
78
|
-
TimestampParser parser = new TimestampParser(
|
75
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
79
76
|
try {
|
80
77
|
parser.parse("2016-05-12 20:14:13.123456789 +09:00");
|
81
78
|
}
|
@@ -88,7 +85,7 @@ public class TestTimestampFormatConverter
|
|
88
85
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
|
89
86
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
90
87
|
|
91
|
-
TimestampParser parser = new TimestampParser(
|
88
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
92
89
|
try {
|
93
90
|
parser.parse("2016-05-12 20:14:13.123456789 UTC");
|
94
91
|
}
|
@@ -101,7 +98,7 @@ public class TestTimestampFormatConverter
|
|
101
98
|
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
|
102
99
|
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
103
100
|
|
104
|
-
TimestampParser parser = new TimestampParser(
|
101
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
105
102
|
try {
|
106
103
|
parser.parse("2016-05-12 20:14:13.123456789 +00:00");
|
107
104
|
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.joda.time.DateTimeZone;
|
7
|
+
|
8
|
+
import org.junit.Before;
|
9
|
+
import org.junit.Rule;
|
10
|
+
import org.junit.Test;
|
11
|
+
|
12
|
+
import java.util.Arrays;
|
13
|
+
|
14
|
+
import static org.junit.Assert.assertEquals;
|
15
|
+
import static org.junit.Assert.fail;
|
16
|
+
|
17
|
+
public class TestTimestampParser
|
18
|
+
{
|
19
|
+
@Rule
|
20
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
21
|
+
public DateTimeZone zone;
|
22
|
+
public Timestamp expected;
|
23
|
+
|
24
|
+
@Before
|
25
|
+
public void createResource()
|
26
|
+
{
|
27
|
+
zone = DateTimeZone.UTC;
|
28
|
+
expected = Timestamp.ofEpochSecond(1463065359, 123456789);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Test
|
32
|
+
public void testJRubyParser()
|
33
|
+
{
|
34
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%N %:z";
|
35
|
+
|
36
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(rubyFormat), zone);
|
37
|
+
try {
|
38
|
+
Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
|
39
|
+
// embulk >= 0.8.27 uses new faster jruby Timestamp parser, and it support nano second
|
40
|
+
// embulk < 0.8.27 uses old slow jruby Timestamp parser, and it does not support nano seconds
|
41
|
+
//assertEquals(expected, actual);
|
42
|
+
}
|
43
|
+
catch (IllegalArgumentException ex) {
|
44
|
+
fail();
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
@Test
|
49
|
+
public void testJavaParser()
|
50
|
+
{
|
51
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
52
|
+
|
53
|
+
TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
|
54
|
+
try {
|
55
|
+
Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
|
56
|
+
assertEquals(expected, actual);
|
57
|
+
}
|
58
|
+
catch (IllegalArgumentException ex) {
|
59
|
+
fail();
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - ~>
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
|
20
|
-
prerelease: false
|
21
|
-
type: :development
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
22
|
- - ~>
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '10.0'
|
33
|
-
|
34
|
-
prerelease: false
|
35
|
-
type: :development
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
36
|
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: A filter plugin for Embulk to change timestamp format.
|
42
42
|
email:
|
43
43
|
- sonots@gmail.com
|
@@ -106,13 +106,13 @@ files:
|
|
106
106
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
107
107
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
108
108
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
109
|
+
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java
|
109
110
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
110
111
|
- classpath/accessors-smart-1.1.jar
|
111
112
|
- classpath/asm-5.0.3.jar
|
112
|
-
- classpath/
|
113
|
-
- classpath/embulk-filter-timestamp_format-0.2.4.jar
|
113
|
+
- classpath/embulk-filter-timestamp_format-0.3.0.jar
|
114
114
|
- classpath/json-smart-2.2.1.jar
|
115
|
-
- classpath/JsonPathCompiler-0.1.
|
115
|
+
- classpath/JsonPathCompiler-0.1.2.jar
|
116
116
|
- classpath/slf4j-api-1.7.21.jar
|
117
117
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
118
118
|
licenses:
|