embulk-filter-timestamp_format 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
4
- data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
3
+ metadata.gz: 451b88351373a8de1875a6fdc5c78e9c91b093e6
4
+ data.tar.gz: afdf5911609850f33f6f3351fe90eed8d96d8f81
5
5
  SHA512:
6
- metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
7
- data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
6
+ metadata.gz: dc4a96d6f2b3a3743514fa175e4bfae7fe89d1f8e104b59460a4d9a70e0f9122b5eea007bfe5937782a45935b7186fe36edebdbd3e5d660e4ea6ce73654f43ae
7
+ data.tar.gz: b3c6057322b0620999b70ab6b49d92bd2154560120ceab0966472775ffcf0b07dbb7748cfe6dac0a53627bd657898039f2069b94eb7380c280b5e6e946883d0f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ # 0.3.0 (2017-08-23)
2
+
3
+ Changes:
4
+
5
+ * Support new TimestampFormatter and TimestampParser API of embulk >= 0.8.29
6
+ * Note that this plugin now requires embulk >= 0.8.29
7
+
8
+ # 0.2.5 (2017-07-11)
9
+
10
+ Enhancements:
11
+
12
+ * Leverage new faster jruby timestamp parser introduced in embulk 0.8.27.
13
+
1
14
  # 0.2.4 (2016-11-06)
2
15
 
3
16
  Enhancements:
data/README.md CHANGED
@@ -83,6 +83,8 @@ Following operators of JSONPath are not supported:
83
83
 
84
84
  ## JRuby Timestamp Parser Performance Issue
85
85
 
86
+ **NEWS: (2017/07/10) embulk 0.8.27 is released with a fast Timestamp jruby parser. This issue should be resolved, so Java Timestamp parser support will be dropped in future releases.**
87
+
86
88
  Embulk's timestamp parser originally uses jruby implementation, but it is slow.
87
89
  To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
88
90
 
@@ -117,10 +119,10 @@ If you configure `timestamp_parser: auto_java`, this plugin tries to convert rub
117
119
 
118
120
  Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
119
121
 
120
- * java parser / java formatter: 1.3s
121
- * java parser / jruby formatter: 1.4s
122
- * jruby parser / java formatter: 64.52s
123
- * jruby parser / jruby formatter: 65.06s
122
+ * java parser + java formatter: 1.3s
123
+ * java parser + jruby formatter: 1.4s
124
+ * jruby parser + java formatter: 64.52s
125
+ * jruby parser + jruby formatter: 65.06s
124
126
 
125
127
  JRuby parser is slow, but JRuby formatter is not so slow.
126
128
 
data/build.gradle CHANGED
@@ -13,17 +13,17 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.4"
16
+ version = "0.3.0"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
- compile "org.embulk:embulk-core:0.8.+"
22
- provided "org.embulk:embulk-core:0.8.+"
21
+ compile "org.embulk:embulk-core:0.8.29+"
22
+ provided "org.embulk:embulk-core:0.8.29+"
23
23
  compile "io.github.medjed:JsonPathCompiler:0.1.+"
24
24
 
25
25
  testCompile "junit:junit:4.+"
26
- testCompile "org.embulk:embulk-core:0.7.+:tests"
26
+ testCompile "org.embulk:embulk-core:0.8.27+:tests"
27
27
  }
28
28
 
29
29
  checkstyle {
@@ -1,10 +1,12 @@
1
- 2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
2
- 2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
- 2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
1
+ 2017-08-23 17:24:55.119 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:24:59.552 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:24:59.617 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2017-08-23 17:24:59.618 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:24:59.625 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
6
+ 2017-08-23 17:24:59.636 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
8
  | record:json |
7
9
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
- | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
- | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"timestamp":1436713200000} |
11
+ | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"timestamp":1436713200100} |
10
12
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
2
- 2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
- 2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
1
+ 2017-08-23 17:25:14.951 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:19.079 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:19.132 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
+ 2017-08-23 17:25:19.133 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:19.138 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
6
+ 2017-08-23 17:25:19.151 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
6
8
  | double1:string | double2:long | double3:double | double4:timestamp | record:json |
7
9
  +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
2
- 2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
- 2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
1
+ 2017-08-23 17:25:28.989 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:33.716 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:33.758 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
+ 2017-08-23 17:25:33.760 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:33.767 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
6
+ 2017-08-23 17:25:33.780 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
6
8
  | long1:string | long2:long | long3:double | long4:timestamp | record:json |
7
9
  +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
@@ -1,21 +1,23 @@
1
- 2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
2
- 2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
6
- | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
8
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
- | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
- | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
- | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
- | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
- | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
- | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
19
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
21
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
1
+ 2017-08-23 17:25:45.974 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:50.111 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:50.154 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:25:50.155 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:50.160 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:25:50.172 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
7
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
8
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
9
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
13
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
14
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
15
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
16
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
17
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
18
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
19
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
+ | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
21
+ | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
22
+ | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
23
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
2
- 2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
1
+ 2017-08-23 17:26:34.305 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:26:38.614 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:26:38.702 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:26:38.704 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:26:38.711 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:26:38.728 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
6
8
  | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
9
  +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
2
- 2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
1
+ 2017-08-23 17:26:56.132 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:27:00.403 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:27:00.454 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:27:00.455 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:27:00.460 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:27:00.474 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
6
8
  | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
9
  +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
2
- 2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
- 2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
1
+ 2017-08-23 17:27:14.804 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:27:19.493 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:27:19.591 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
+ 2017-08-23 17:27:19.592 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:27:19.600 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
6
+ 2017-08-23 17:27:19.623 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+-------------------+-------------------+-----------------------------+
6
8
  | timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
7
9
  +-------------------------------+-------------------+-------------------+-----------------------------+
@@ -88,7 +88,7 @@ public class ColumnCaster
88
88
  }
89
89
  }
90
90
  }
91
- return new TimestampParser(task.getJRuby(), newFormatList, timezone);
91
+ return new TimestampParser(newFormatList, timezone);
92
92
  }
93
93
 
94
94
  private void buildTimestampFormatterMap()
@@ -106,7 +106,7 @@ public class ColumnCaster
106
106
  {
107
107
  String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
108
108
  DateTimeZone timezone = columnConfig.getToTimeZone().or(task.getDefaultToTimeZone());
109
- return new TimestampFormatter(task.getJRuby(), format, timezone);
109
+ return new TimestampFormatter(format, timezone);
110
110
  }
111
111
 
112
112
  private void buildFromTimestampUnitMap()
@@ -24,7 +24,6 @@ import org.embulk.spi.type.BooleanType;
24
24
  import org.embulk.spi.type.JsonType;
25
25
  import org.embulk.spi.type.TimestampType;
26
26
  import org.embulk.spi.type.Type;
27
- import org.jruby.embed.ScriptingContainer;
28
27
  import org.slf4j.Logger;
29
28
 
30
29
  import java.util.List;
@@ -77,9 +76,6 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
77
76
  @Config("default_to_timestamp_unit")
78
77
  @ConfigDefault("\"second\"")
79
78
  TimestampUnit getDefaultToTimestampUnit();
80
-
81
- @ConfigInject
82
- ScriptingContainer getJRuby();
83
79
  }
84
80
 
85
81
  @Override
@@ -12,7 +12,6 @@ import org.embulk.spi.util.LineEncoder;
12
12
 
13
13
  import org.joda.time.DateTime;
14
14
  import org.joda.time.DateTimeZone;
15
- import org.jruby.embed.ScriptingContainer;
16
15
  import org.jruby.util.RubyDateFormat;
17
16
 
18
17
  import java.util.Locale;
@@ -52,16 +51,15 @@ public class TimestampFormatter
52
51
 
53
52
  public TimestampFormatter(PluginTask task, Optional<? extends TimestampColumnOption> columnOption)
54
53
  {
55
- this(task.getJRuby(),
56
- columnOption.isPresent() ?
57
- columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
58
- : task.getDefaultToTimestampFormat(),
59
- columnOption.isPresent() ?
60
- columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
61
- : task.getDefaultToTimeZone());
54
+ this(columnOption.isPresent() ?
55
+ columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
56
+ : task.getDefaultToTimestampFormat(),
57
+ columnOption.isPresent() ?
58
+ columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
59
+ : task.getDefaultToTimeZone());
62
60
  }
63
61
 
64
- public TimestampFormatter(ScriptingContainer jruby, String format, DateTimeZone toTimeZone)
62
+ public TimestampFormatter(String format, DateTimeZone toTimeZone)
65
63
  {
66
64
  this.toTimeZone = toTimeZone;
67
65
  if (format.contains("%")) {
@@ -17,7 +17,6 @@ import org.embulk.spi.time.TimestampParseException;
17
17
  import org.joda.time.DateTime;
18
18
  import org.joda.time.DateTimeZone;
19
19
  import org.joda.time.format.DateTimeFormatter;
20
- import org.jruby.embed.ScriptingContainer;
21
20
 
22
21
  import java.util.ArrayList;
23
22
  import java.util.List;
@@ -48,30 +47,27 @@ public class TimestampParser {
48
47
  Optional<List<String>> getFromFormat();
49
48
  }
50
49
 
51
- private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
50
+ private final List<org.embulk.spi.time.TimestampParser> jrubyParserList = new ArrayList<>();
52
51
  private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
53
52
  private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
54
53
  private final DateTimeZone defaultFromTimeZone;
55
54
  private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
56
55
 
57
56
  TimestampParser(PluginTask task) {
58
- this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
57
+ this(task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
59
58
  }
60
59
 
61
60
  public TimestampParser(PluginTask task, TimestampColumnOption columnOption) {
62
- this(task.getJRuby(),
63
- columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
64
- columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
61
+ this(columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
62
+ columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
65
63
  }
66
64
 
67
- public TimestampParser(ScriptingContainer jruby, List<String> formatList, DateTimeZone defaultFromTimeZone) {
68
- JRubyTimeParserHelperFactory helperFactory = (JRubyTimeParserHelperFactory) jruby.runScriptlet("Embulk::Java::TimeParserHelper::Factory.new");
69
-
65
+ public TimestampParser(List<String> formatList, DateTimeZone defaultFromTimeZone) {
70
66
  // TODO get default current time from ExecTask.getExecTimestamp
71
67
  for (String format : formatList) {
72
68
  if (format.contains("%")) {
73
- JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
74
- this.jrubyParserList.add(helper);
69
+ org.embulk.spi.time.TimestampParser parser = new org.embulk.spi.time.TimestampParser(format, defaultFromTimeZone);
70
+ this.jrubyParserList.add(parser);
75
71
  } else {
76
72
  // special treatment for nano resolution. n is not originally supported by Joda-Time
77
73
  if (format.contains("nnnnnnnnn")) {
@@ -106,38 +102,25 @@ public class TimestampParser {
106
102
  }
107
103
 
108
104
  private Timestamp jrubyParse(String text) throws TimestampParseException {
109
- long localUsec = -1;
105
+ Timestamp timestamp = null;
110
106
  TimestampParseException exception = null;
111
107
 
112
- JRubyTimeParserHelper helper = null;
113
- for (JRubyTimeParserHelper h : jrubyParserList) {
114
- helper = h;
108
+ org.embulk.spi.time.TimestampParser parser = null;
109
+ for (org.embulk.spi.time.TimestampParser p : jrubyParserList) {
110
+ parser = p;
115
111
  try {
116
- localUsec = helper.strptimeUsec(text); // NOTE: micro second resolution
112
+ // NOTE: embulk >= 0.8.27 uses new faster jruby timestamp parser, and it supports nano second
113
+ // NOTE: embulk < 0.8.27 uses old slower jruby timestamp parser, and it supports micro second
114
+ timestamp = parser.parse(text);
117
115
  break;
118
116
  } catch (TimestampParseException ex) {
119
117
  exception = ex;
120
118
  }
121
119
  }
122
- if (localUsec == -1) {
120
+ if (timestamp == null) {
123
121
  throw exception;
124
122
  }
125
- DateTimeZone timeZone = defaultFromTimeZone;
126
- String zone = helper.getZone();
127
-
128
- if (zone != null) {
129
- // TODO cache parsed zone?
130
- timeZone = parseDateTimeZone(zone);
131
- if (timeZone == null) {
132
- throw new TimestampParseException("Invalid time zone name '" + text + "'");
133
- }
134
- }
135
-
136
- long localSec = localUsec / 1000000;
137
- long usec = localUsec % 1000000;
138
- long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
139
-
140
- return Timestamp.ofEpochSecond(sec, usec * 1000);
123
+ return timestamp;
141
124
  }
142
125
 
143
126
  private Timestamp javaParse(String text) throws IllegalArgumentException {
@@ -4,7 +4,6 @@ import org.embulk.EmbulkTestRuntime;
4
4
 
5
5
  import org.embulk.spi.time.Timestamp;
6
6
  import org.joda.time.DateTimeZone;
7
- import org.jruby.embed.ScriptingContainer;
8
7
 
9
8
  import org.junit.Before;
10
9
  import org.junit.Rule;
@@ -20,14 +19,12 @@ public class TestTimestampFormatConverter
20
19
  {
21
20
  @Rule
22
21
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
23
- public ScriptingContainer jruby;
24
22
  public DateTimeZone zone;
25
23
  public Timestamp timestamp;
26
24
 
27
25
  @Before
28
26
  public void createResource()
29
27
  {
30
- jruby = new ScriptingContainer();
31
28
  zone = DateTimeZone.UTC;
32
29
  timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
33
30
  }
@@ -39,14 +36,14 @@ public class TestTimestampFormatConverter
39
36
  String rubyFormat = "%" + entry.getKey();
40
37
  String javaFormat = entry.getValue();
41
38
 
42
- TimestampFormatter rubyFormatter = new TimestampFormatter(jruby, rubyFormat, zone);
43
- TimestampFormatter javaFormatter = new TimestampFormatter(jruby, javaFormat, zone);
39
+ TimestampFormatter rubyFormatter = new TimestampFormatter(rubyFormat, zone);
40
+ TimestampFormatter javaFormatter = new TimestampFormatter(javaFormat, zone);
44
41
  String rubyFormatted = rubyFormatter.format(timestamp);
45
42
  String javaFormatted = javaFormatter.format(timestamp);
46
43
  // System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
47
44
 
48
- TimestampParser rubyParser = new TimestampParser(jruby, Arrays.asList("." + rubyFormat), zone);
49
- TimestampParser javaParser = new TimestampParser(jruby, Arrays.asList("." + javaFormat), zone);
45
+ TimestampParser rubyParser = new TimestampParser(Arrays.asList("." + rubyFormat), zone);
46
+ TimestampParser javaParser = new TimestampParser(Arrays.asList("." + javaFormat), zone);
50
47
  Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
51
48
  try {
52
49
  Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
@@ -75,7 +72,7 @@ public class TestTimestampFormatConverter
75
72
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
76
73
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
77
74
 
78
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
75
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
79
76
  try {
80
77
  parser.parse("2016-05-12 20:14:13.123456789 +09:00");
81
78
  }
@@ -88,7 +85,7 @@ public class TestTimestampFormatConverter
88
85
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
89
86
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
90
87
 
91
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
88
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
92
89
  try {
93
90
  parser.parse("2016-05-12 20:14:13.123456789 UTC");
94
91
  }
@@ -101,7 +98,7 @@ public class TestTimestampFormatConverter
101
98
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
102
99
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
103
100
 
104
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
101
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
105
102
  try {
106
103
  parser.parse("2016-05-12 20:14:13.123456789 +00:00");
107
104
  }
@@ -0,0 +1,62 @@
1
+ package org.embulk.filter.timestamp_format;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.joda.time.DateTimeZone;
7
+
8
+ import org.junit.Before;
9
+ import org.junit.Rule;
10
+ import org.junit.Test;
11
+
12
+ import java.util.Arrays;
13
+
14
+ import static org.junit.Assert.assertEquals;
15
+ import static org.junit.Assert.fail;
16
+
17
+ public class TestTimestampParser
18
+ {
19
+ @Rule
20
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
21
+ public DateTimeZone zone;
22
+ public Timestamp expected;
23
+
24
+ @Before
25
+ public void createResource()
26
+ {
27
+ zone = DateTimeZone.UTC;
28
+ expected = Timestamp.ofEpochSecond(1463065359, 123456789);
29
+ }
30
+
31
+ @Test
32
+ public void testJRubyParser()
33
+ {
34
+ String rubyFormat = "%Y-%m-%d %H:%M:%S.%N %:z";
35
+
36
+ TimestampParser parser = new TimestampParser(Arrays.asList(rubyFormat), zone);
37
+ try {
38
+ Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
39
+ // embulk >= 0.8.27 uses new faster jruby Timestamp parser, and it support nano second
40
+ // embulk < 0.8.27 uses old slow jruby Timestamp parser, and it does not support nano seconds
41
+ //assertEquals(expected, actual);
42
+ }
43
+ catch (IllegalArgumentException ex) {
44
+ fail();
45
+ }
46
+ }
47
+
48
+ @Test
49
+ public void testJavaParser()
50
+ {
51
+ String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
52
+
53
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
54
+ try {
55
+ Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
56
+ assertEquals(expected, actual);
57
+ }
58
+ catch (IllegalArgumentException ex) {
59
+ fail();
60
+ }
61
+ }
62
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-06 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - ~>
17
18
  - !ruby/object:Gem::Version
18
19
  version: '1.0'
19
- name: bundler
20
- prerelease: false
21
- type: :development
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
- requirement: !ruby/object:Gem::Requirement
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
29
30
  requirements:
30
31
  - - '>='
31
32
  - !ruby/object:Gem::Version
32
33
  version: '10.0'
33
- name: rake
34
- prerelease: false
35
- type: :development
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
36
  - - '>='
39
37
  - !ruby/object:Gem::Version
40
38
  version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
41
  description: A filter plugin for Embulk to change timestamp format.
42
42
  email:
43
43
  - sonots@gmail.com
@@ -106,13 +106,13 @@ files:
106
106
  - src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
107
107
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
108
108
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
109
+ - src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java
109
110
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
110
111
  - classpath/accessors-smart-1.1.jar
111
112
  - classpath/asm-5.0.3.jar
112
- - classpath/commons-lang3-3.4.jar
113
- - classpath/embulk-filter-timestamp_format-0.2.4.jar
113
+ - classpath/embulk-filter-timestamp_format-0.3.0.jar
114
114
  - classpath/json-smart-2.2.1.jar
115
- - classpath/JsonPathCompiler-0.1.1.jar
115
+ - classpath/JsonPathCompiler-0.1.2.jar
116
116
  - classpath/slf4j-api-1.7.21.jar
117
117
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
118
118
  licenses: