embulk-filter-timestamp_format 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
4
- data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
3
+ metadata.gz: 451b88351373a8de1875a6fdc5c78e9c91b093e6
4
+ data.tar.gz: afdf5911609850f33f6f3351fe90eed8d96d8f81
5
5
  SHA512:
6
- metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
7
- data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
6
+ metadata.gz: dc4a96d6f2b3a3743514fa175e4bfae7fe89d1f8e104b59460a4d9a70e0f9122b5eea007bfe5937782a45935b7186fe36edebdbd3e5d660e4ea6ce73654f43ae
7
+ data.tar.gz: b3c6057322b0620999b70ab6b49d92bd2154560120ceab0966472775ffcf0b07dbb7748cfe6dac0a53627bd657898039f2069b94eb7380c280b5e6e946883d0f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ # 0.3.0 (2017-08-23)
2
+
3
+ Changes:
4
+
5
+ * Support new TimestampFormatter and TimestampParser API of embulk >= 0.8.29
6
+ * Note that this plugin now requires embulk >= 0.8.29
7
+
8
+ # 0.2.5 (2017-07-11)
9
+
10
+ Enhancements:
11
+
12
+ * Leverage new faster jruby timestamp parser introduced in embulk 0.8.27.
13
+
1
14
  # 0.2.4 (2016-11-06)
2
15
 
3
16
  Enhancements:
data/README.md CHANGED
@@ -83,6 +83,8 @@ Following operators of JSONPath are not supported:
83
83
 
84
84
  ## JRuby Timestamp Parser Performance Issue
85
85
 
86
+ **NEWS: (2017/07/10) embulk 0.8.27 is released with a fast Timestamp jruby parser. This issue should be resolved, so Java Timestamp parser support will be dropped in future releases.**
87
+
86
88
  Embulk's timestamp parser originally uses jruby implementation, but it is slow.
87
89
  To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
88
90
 
@@ -117,10 +119,10 @@ If you configure `timestamp_parser: auto_java`, this plugin tries to convert rub
117
119
 
118
120
  Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
119
121
 
120
- * java parser / java formatter: 1.3s
121
- * java parser / jruby formatter: 1.4s
122
- * jruby parser / java formatter: 64.52s
123
- * jruby parser / jruby formatter: 65.06s
122
+ * java parser + java formatter: 1.3s
123
+ * java parser + jruby formatter: 1.4s
124
+ * jruby parser + java formatter: 64.52s
125
+ * jruby parser + jruby formatter: 65.06s
124
126
 
125
127
  JRuby parser is slow, but JRuby formatter is not so slow.
126
128
 
data/build.gradle CHANGED
@@ -13,17 +13,17 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.4"
16
+ version = "0.3.0"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
- compile "org.embulk:embulk-core:0.8.+"
22
- provided "org.embulk:embulk-core:0.8.+"
21
+ compile "org.embulk:embulk-core:0.8.29+"
22
+ provided "org.embulk:embulk-core:0.8.29+"
23
23
  compile "io.github.medjed:JsonPathCompiler:0.1.+"
24
24
 
25
25
  testCompile "junit:junit:4.+"
26
- testCompile "org.embulk:embulk-core:0.7.+:tests"
26
+ testCompile "org.embulk:embulk-core:0.8.27+:tests"
27
27
  }
28
28
 
29
29
  checkstyle {
@@ -1,10 +1,12 @@
1
- 2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
2
- 2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
- 2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
1
+ 2017-08-23 17:24:55.119 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:24:59.552 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:24:59.617 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2017-08-23 17:24:59.618 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:24:59.625 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
6
+ 2017-08-23 17:24:59.636 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
8
  | record:json |
7
9
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
- | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
- | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"timestamp":1436713200000} |
11
+ | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"timestamp":1436713200100} |
10
12
  +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
2
- 2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
- 2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
1
+ 2017-08-23 17:25:14.951 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:19.079 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:19.132 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
+ 2017-08-23 17:25:19.133 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:19.138 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
6
+ 2017-08-23 17:25:19.151 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
6
8
  | double1:string | double2:long | double3:double | double4:timestamp | record:json |
7
9
  +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
2
- 2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
- 2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
1
+ 2017-08-23 17:25:28.989 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:33.716 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:33.758 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
+ 2017-08-23 17:25:33.760 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:33.767 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
6
+ 2017-08-23 17:25:33.780 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
6
8
  | long1:string | long2:long | long3:double | long4:timestamp | record:json |
7
9
  +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
@@ -1,21 +1,23 @@
1
- 2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
2
- 2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
6
- | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
8
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
- | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
- | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
- | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
- | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
- | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
- | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
- | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
19
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
- | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
21
- +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
1
+ 2017-08-23 17:25:45.974 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:25:50.111 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:25:50.154 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:25:50.155 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:25:50.160 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:25:50.172 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
7
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
8
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
9
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
13
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
14
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
15
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
16
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
17
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
18
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
19
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
+ | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
21
+ | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
22
+ | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
23
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
2
- 2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
1
+ 2017-08-23 17:26:34.305 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:26:38.614 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:26:38.702 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:26:38.704 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:26:38.711 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:26:38.728 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
6
8
  | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
9
  +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
2
- 2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
- 2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
1
+ 2017-08-23 17:26:56.132 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:27:00.403 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:27:00.454 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2017-08-23 17:27:00.455 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:27:00.460 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
6
+ 2017-08-23 17:27:00.474 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
6
8
  | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
9
  +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
@@ -1,7 +1,9 @@
1
- 2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
2
- 2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
- 2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
- 2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
1
+ 2017-08-23 17:27:14.804 +0900: Embulk v0.8.30
2
+ 2017-08-23 17:27:19.493 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2017-08-23 17:27:19.591 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
+ 2017-08-23 17:27:19.592 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped.
5
+ 2017-08-23 17:27:19.600 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
6
+ 2017-08-23 17:27:19.623 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source
5
7
  +-------------------------------+-------------------+-------------------+-----------------------------+
6
8
  | timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
7
9
  +-------------------------------+-------------------+-------------------+-----------------------------+
@@ -88,7 +88,7 @@ public class ColumnCaster
88
88
  }
89
89
  }
90
90
  }
91
- return new TimestampParser(task.getJRuby(), newFormatList, timezone);
91
+ return new TimestampParser(newFormatList, timezone);
92
92
  }
93
93
 
94
94
  private void buildTimestampFormatterMap()
@@ -106,7 +106,7 @@ public class ColumnCaster
106
106
  {
107
107
  String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
108
108
  DateTimeZone timezone = columnConfig.getToTimeZone().or(task.getDefaultToTimeZone());
109
- return new TimestampFormatter(task.getJRuby(), format, timezone);
109
+ return new TimestampFormatter(format, timezone);
110
110
  }
111
111
 
112
112
  private void buildFromTimestampUnitMap()
@@ -24,7 +24,6 @@ import org.embulk.spi.type.BooleanType;
24
24
  import org.embulk.spi.type.JsonType;
25
25
  import org.embulk.spi.type.TimestampType;
26
26
  import org.embulk.spi.type.Type;
27
- import org.jruby.embed.ScriptingContainer;
28
27
  import org.slf4j.Logger;
29
28
 
30
29
  import java.util.List;
@@ -77,9 +76,6 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
77
76
  @Config("default_to_timestamp_unit")
78
77
  @ConfigDefault("\"second\"")
79
78
  TimestampUnit getDefaultToTimestampUnit();
80
-
81
- @ConfigInject
82
- ScriptingContainer getJRuby();
83
79
  }
84
80
 
85
81
  @Override
@@ -12,7 +12,6 @@ import org.embulk.spi.util.LineEncoder;
12
12
 
13
13
  import org.joda.time.DateTime;
14
14
  import org.joda.time.DateTimeZone;
15
- import org.jruby.embed.ScriptingContainer;
16
15
  import org.jruby.util.RubyDateFormat;
17
16
 
18
17
  import java.util.Locale;
@@ -52,16 +51,15 @@ public class TimestampFormatter
52
51
 
53
52
  public TimestampFormatter(PluginTask task, Optional<? extends TimestampColumnOption> columnOption)
54
53
  {
55
- this(task.getJRuby(),
56
- columnOption.isPresent() ?
57
- columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
58
- : task.getDefaultToTimestampFormat(),
59
- columnOption.isPresent() ?
60
- columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
61
- : task.getDefaultToTimeZone());
54
+ this(columnOption.isPresent() ?
55
+ columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat())
56
+ : task.getDefaultToTimestampFormat(),
57
+ columnOption.isPresent() ?
58
+ columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone())
59
+ : task.getDefaultToTimeZone());
62
60
  }
63
61
 
64
- public TimestampFormatter(ScriptingContainer jruby, String format, DateTimeZone toTimeZone)
62
+ public TimestampFormatter(String format, DateTimeZone toTimeZone)
65
63
  {
66
64
  this.toTimeZone = toTimeZone;
67
65
  if (format.contains("%")) {
@@ -17,7 +17,6 @@ import org.embulk.spi.time.TimestampParseException;
17
17
  import org.joda.time.DateTime;
18
18
  import org.joda.time.DateTimeZone;
19
19
  import org.joda.time.format.DateTimeFormatter;
20
- import org.jruby.embed.ScriptingContainer;
21
20
 
22
21
  import java.util.ArrayList;
23
22
  import java.util.List;
@@ -48,30 +47,27 @@ public class TimestampParser {
48
47
  Optional<List<String>> getFromFormat();
49
48
  }
50
49
 
51
- private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
50
+ private final List<org.embulk.spi.time.TimestampParser> jrubyParserList = new ArrayList<>();
52
51
  private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
53
52
  private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
54
53
  private final DateTimeZone defaultFromTimeZone;
55
54
  private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
56
55
 
57
56
  TimestampParser(PluginTask task) {
58
- this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
57
+ this(task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
59
58
  }
60
59
 
61
60
  public TimestampParser(PluginTask task, TimestampColumnOption columnOption) {
62
- this(task.getJRuby(),
63
- columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
64
- columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
61
+ this(columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()),
62
+ columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone()));
65
63
  }
66
64
 
67
- public TimestampParser(ScriptingContainer jruby, List<String> formatList, DateTimeZone defaultFromTimeZone) {
68
- JRubyTimeParserHelperFactory helperFactory = (JRubyTimeParserHelperFactory) jruby.runScriptlet("Embulk::Java::TimeParserHelper::Factory.new");
69
-
65
+ public TimestampParser(List<String> formatList, DateTimeZone defaultFromTimeZone) {
70
66
  // TODO get default current time from ExecTask.getExecTimestamp
71
67
  for (String format : formatList) {
72
68
  if (format.contains("%")) {
73
- JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
74
- this.jrubyParserList.add(helper);
69
+ org.embulk.spi.time.TimestampParser parser = new org.embulk.spi.time.TimestampParser(format, defaultFromTimeZone);
70
+ this.jrubyParserList.add(parser);
75
71
  } else {
76
72
  // special treatment for nano resolution. n is not originally supported by Joda-Time
77
73
  if (format.contains("nnnnnnnnn")) {
@@ -106,38 +102,25 @@ public class TimestampParser {
106
102
  }
107
103
 
108
104
  private Timestamp jrubyParse(String text) throws TimestampParseException {
109
- long localUsec = -1;
105
+ Timestamp timestamp = null;
110
106
  TimestampParseException exception = null;
111
107
 
112
- JRubyTimeParserHelper helper = null;
113
- for (JRubyTimeParserHelper h : jrubyParserList) {
114
- helper = h;
108
+ org.embulk.spi.time.TimestampParser parser = null;
109
+ for (org.embulk.spi.time.TimestampParser p : jrubyParserList) {
110
+ parser = p;
115
111
  try {
116
- localUsec = helper.strptimeUsec(text); // NOTE: micro second resolution
112
+ // NOTE: embulk >= 0.8.27 uses new faster jruby timestamp parser, and it supports nano second
113
+ // NOTE: embulk < 0.8.27 uses old slower jruby timestamp parser, and it supports micro second
114
+ timestamp = parser.parse(text);
117
115
  break;
118
116
  } catch (TimestampParseException ex) {
119
117
  exception = ex;
120
118
  }
121
119
  }
122
- if (localUsec == -1) {
120
+ if (timestamp == null) {
123
121
  throw exception;
124
122
  }
125
- DateTimeZone timeZone = defaultFromTimeZone;
126
- String zone = helper.getZone();
127
-
128
- if (zone != null) {
129
- // TODO cache parsed zone?
130
- timeZone = parseDateTimeZone(zone);
131
- if (timeZone == null) {
132
- throw new TimestampParseException("Invalid time zone name '" + text + "'");
133
- }
134
- }
135
-
136
- long localSec = localUsec / 1000000;
137
- long usec = localUsec % 1000000;
138
- long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
139
-
140
- return Timestamp.ofEpochSecond(sec, usec * 1000);
123
+ return timestamp;
141
124
  }
142
125
 
143
126
  private Timestamp javaParse(String text) throws IllegalArgumentException {
@@ -4,7 +4,6 @@ import org.embulk.EmbulkTestRuntime;
4
4
 
5
5
  import org.embulk.spi.time.Timestamp;
6
6
  import org.joda.time.DateTimeZone;
7
- import org.jruby.embed.ScriptingContainer;
8
7
 
9
8
  import org.junit.Before;
10
9
  import org.junit.Rule;
@@ -20,14 +19,12 @@ public class TestTimestampFormatConverter
20
19
  {
21
20
  @Rule
22
21
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
23
- public ScriptingContainer jruby;
24
22
  public DateTimeZone zone;
25
23
  public Timestamp timestamp;
26
24
 
27
25
  @Before
28
26
  public void createResource()
29
27
  {
30
- jruby = new ScriptingContainer();
31
28
  zone = DateTimeZone.UTC;
32
29
  timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
33
30
  }
@@ -39,14 +36,14 @@ public class TestTimestampFormatConverter
39
36
  String rubyFormat = "%" + entry.getKey();
40
37
  String javaFormat = entry.getValue();
41
38
 
42
- TimestampFormatter rubyFormatter = new TimestampFormatter(jruby, rubyFormat, zone);
43
- TimestampFormatter javaFormatter = new TimestampFormatter(jruby, javaFormat, zone);
39
+ TimestampFormatter rubyFormatter = new TimestampFormatter(rubyFormat, zone);
40
+ TimestampFormatter javaFormatter = new TimestampFormatter(javaFormat, zone);
44
41
  String rubyFormatted = rubyFormatter.format(timestamp);
45
42
  String javaFormatted = javaFormatter.format(timestamp);
46
43
  // System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
47
44
 
48
- TimestampParser rubyParser = new TimestampParser(jruby, Arrays.asList("." + rubyFormat), zone);
49
- TimestampParser javaParser = new TimestampParser(jruby, Arrays.asList("." + javaFormat), zone);
45
+ TimestampParser rubyParser = new TimestampParser(Arrays.asList("." + rubyFormat), zone);
46
+ TimestampParser javaParser = new TimestampParser(Arrays.asList("." + javaFormat), zone);
50
47
  Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
51
48
  try {
52
49
  Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
@@ -75,7 +72,7 @@ public class TestTimestampFormatConverter
75
72
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
76
73
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
77
74
 
78
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
75
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
79
76
  try {
80
77
  parser.parse("2016-05-12 20:14:13.123456789 +09:00");
81
78
  }
@@ -88,7 +85,7 @@ public class TestTimestampFormatConverter
88
85
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
89
86
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
90
87
 
91
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
88
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
92
89
  try {
93
90
  parser.parse("2016-05-12 20:14:13.123456789 UTC");
94
91
  }
@@ -101,7 +98,7 @@ public class TestTimestampFormatConverter
101
98
  String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
102
99
  assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
103
100
 
104
- TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
101
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
105
102
  try {
106
103
  parser.parse("2016-05-12 20:14:13.123456789 +00:00");
107
104
  }
@@ -0,0 +1,62 @@
1
+ package org.embulk.filter.timestamp_format;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.joda.time.DateTimeZone;
7
+
8
+ import org.junit.Before;
9
+ import org.junit.Rule;
10
+ import org.junit.Test;
11
+
12
+ import java.util.Arrays;
13
+
14
+ import static org.junit.Assert.assertEquals;
15
+ import static org.junit.Assert.fail;
16
+
17
+ public class TestTimestampParser
18
+ {
19
+ @Rule
20
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
21
+ public DateTimeZone zone;
22
+ public Timestamp expected;
23
+
24
+ @Before
25
+ public void createResource()
26
+ {
27
+ zone = DateTimeZone.UTC;
28
+ expected = Timestamp.ofEpochSecond(1463065359, 123456789);
29
+ }
30
+
31
+ @Test
32
+ public void testJRubyParser()
33
+ {
34
+ String rubyFormat = "%Y-%m-%d %H:%M:%S.%N %:z";
35
+
36
+ TimestampParser parser = new TimestampParser(Arrays.asList(rubyFormat), zone);
37
+ try {
38
+ Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
39
+ // embulk >= 0.8.27 uses new faster jruby Timestamp parser, and it support nano second
40
+ // embulk < 0.8.27 uses old slow jruby Timestamp parser, and it does not support nano seconds
41
+ //assertEquals(expected, actual);
42
+ }
43
+ catch (IllegalArgumentException ex) {
44
+ fail();
45
+ }
46
+ }
47
+
48
+ @Test
49
+ public void testJavaParser()
50
+ {
51
+ String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
52
+
53
+ TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone);
54
+ try {
55
+ Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00");
56
+ assertEquals(expected, actual);
57
+ }
58
+ catch (IllegalArgumentException ex) {
59
+ fail();
60
+ }
61
+ }
62
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-06 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - ~>
17
18
  - !ruby/object:Gem::Version
18
19
  version: '1.0'
19
- name: bundler
20
- prerelease: false
21
- type: :development
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
- requirement: !ruby/object:Gem::Requirement
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
29
30
  requirements:
30
31
  - - '>='
31
32
  - !ruby/object:Gem::Version
32
33
  version: '10.0'
33
- name: rake
34
- prerelease: false
35
- type: :development
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
36
  - - '>='
39
37
  - !ruby/object:Gem::Version
40
38
  version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
41
  description: A filter plugin for Embulk to change timestamp format.
42
42
  email:
43
43
  - sonots@gmail.com
@@ -106,13 +106,13 @@ files:
106
106
  - src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
107
107
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
108
108
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
109
+ - src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java
109
110
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
110
111
  - classpath/accessors-smart-1.1.jar
111
112
  - classpath/asm-5.0.3.jar
112
- - classpath/commons-lang3-3.4.jar
113
- - classpath/embulk-filter-timestamp_format-0.2.4.jar
113
+ - classpath/embulk-filter-timestamp_format-0.3.0.jar
114
114
  - classpath/json-smart-2.2.1.jar
115
- - classpath/JsonPathCompiler-0.1.1.jar
115
+ - classpath/JsonPathCompiler-0.1.2.jar
116
116
  - classpath/slf4j-api-1.7.21.jar
117
117
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
118
118
  licenses: