embulk-filter-timestamp_format 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +11 -5
- data/build.gradle +3 -2
- data/example/bracket_notation.txt +10 -0
- data/example/bracket_notation.yml +14 -0
- data/example/example.yml +18 -5
- data/example/from_double.csv +1 -0
- data/example/from_double.txt +9 -0
- data/example/{double.yml → from_double.yml} +8 -4
- data/example/from_long.csv +1 -0
- data/example/from_long.txt +9 -0
- data/example/{long.yml → from_long.yml} +8 -4
- data/example/from_string.csv +14 -0
- data/example/from_string.txt +21 -0
- data/example/{string.yml → from_string.yml} +9 -5
- data/example/from_string_auto_java.txt +21 -0
- data/example/{string_auto_java.yml → from_string_auto_java.yml} +8 -4
- data/example/from_string_java.txt +21 -0
- data/example/{string_nano.yml → from_string_java.yml} +8 -4
- data/example/{timestamp.csv → from_timestamp.csv} +0 -0
- data/example/from_timestamp.txt +9 -0
- data/example/{timestamp.yml → from_timestamp.yml} +4 -4
- data/example/{example.jsonl → nested.jsonl} +0 -0
- data/example/nested.txt +10 -0
- data/example/nested.yml +14 -0
- data/example/{example2.csv → timezone.csv} +0 -0
- data/example/timezone.txt +10 -0
- data/example/timezone.yml +16 -0
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +3 -1
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +4 -4
- data/src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java +78 -0
- data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +31 -21
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +5 -5
- metadata +34 -23
- data/example/double.csv +0 -2
- data/example/example2.yml +0 -14
- data/example/json_double.jsonl +0 -1
- data/example/json_double.yml +0 -14
- data/example/json_long.jsonl +0 -1
- data/example/json_long.yml +0 -14
- data/example/json_string.jsonl +0 -2
- data/example/json_string.yml +0 -14
- data/example/long.csv +0 -1
- data/example/string.csv +0 -14
- data/example/string_java.yml +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
|
4
|
+
data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
|
7
|
+
data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -63,17 +63,23 @@ Output will be as:
|
|
63
63
|
|
64
64
|
See [./example](./example) for more examples.
|
65
65
|
|
66
|
-
## JSONPath
|
66
|
+
## JSONPath
|
67
67
|
|
68
68
|
For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
|
69
69
|
|
70
70
|
```
|
71
|
-
$.payload.key1
|
72
|
-
$.payload.array[0]
|
73
|
-
$.payload.array[*]
|
71
|
+
name: $.payload.key1
|
72
|
+
name: "$.payload.array[0]"
|
73
|
+
name: "$.payload.array[*]"
|
74
|
+
name: $['payload']['key1.key2']
|
74
75
|
```
|
75
76
|
|
76
|
-
|
77
|
+
Following operators of JSONPath are not supported:
|
78
|
+
|
79
|
+
* Multiple properties such as `['name','name']`
|
80
|
+
* Multiple array indexes such as `[1,2]`
|
81
|
+
* Array slice such as `[1:2]`
|
82
|
+
* Filter expression such as `[?(<expression>)]`
|
77
83
|
|
78
84
|
## JRuby Timestamp Parser Performance Issue
|
79
85
|
|
data/build.gradle
CHANGED
@@ -13,14 +13,15 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.2.
|
16
|
+
version = "0.2.4"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.8.+"
|
22
22
|
provided "org.embulk:embulk-core:0.8.+"
|
23
|
-
|
23
|
+
compile "io.github.medjed:JsonPathCompiler:0.1.+"
|
24
|
+
|
24
25
|
testCompile "junit:junit:4.+"
|
25
26
|
testCompile "org.embulk:embulk-core:0.7.+:tests"
|
26
27
|
}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
5
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
|
+
| record:json |
|
7
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
+
| {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
|
9
|
+
| {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
|
10
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/nested.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_to_timezone: "Asia/Tokyo"
|
9
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
columns:
|
11
|
+
- {name: "$['record']['timestamp']", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
|
12
|
+
- {name: "$['record']['nested']['nested'][0]['timestamp']", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/example/example.yml
CHANGED
@@ -1,14 +1,27 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
|
-
type:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
6
12
|
filters:
|
7
13
|
- type: timestamp_format
|
8
|
-
|
14
|
+
default_from_timezone: "Asia/Taipei"
|
15
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
|
16
|
+
default_to_timezone: "Asia/Taipei"
|
9
17
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
18
|
columns:
|
11
|
-
- {name:
|
12
|
-
- {name:
|
19
|
+
- {name: string1, type: string}
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
13
26
|
out:
|
14
27
|
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2,"{""double1"":1436713200100.2,""double2"":1436713200100.2,""double3"":1436713200100.2}"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
|
4
|
+
2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
|
5
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
6
|
+
| double1:string | double2:long | double3:double | double4:timestamp | record:json |
|
7
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100199936 | 1,436,713,200 | 1.4367132001002E9 | 2015-07-12 15:00:00.100199936 UTC | {"double2":1436713200,"double3":1.4367132001002E9,"double1":"2015-07-13 00:00:00.100199936"} |
|
9
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_double.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,13 +8,17 @@ in:
|
|
8
8
|
- {name: double2, type: double}
|
9
9
|
- {name: double3, type: double}
|
10
10
|
- {name: double4, type: double}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timestamp_unit: ms
|
14
15
|
columns:
|
15
16
|
- {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
-
- {name: double2, type:
|
17
|
-
- {name: double3, type:
|
18
|
-
- {name: double4, type:
|
17
|
+
- {name: double2, type: long}
|
18
|
+
- {name: double3, type: double}
|
19
|
+
- {name: double4, type: timestamp}
|
20
|
+
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
21
|
+
- {name: $.record.double2, type: long}
|
22
|
+
- {name: $.record.double3, type: double}
|
19
23
|
out:
|
20
24
|
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100,1436713200100,1436713200100,1436713200100,"{""long1"":1436713200100,""long2"":1436713200100,""long3"":1436713200100}"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
|
4
|
+
2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
|
5
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
6
|
+
| long1:string | long2:long | long3:double | long4:timestamp | record:json |
|
7
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,713,200 | 1.4367132E9 | 2015-07-12 15:00:00.100 UTC | {"long3":1.4367132E9,"long2":1436713200,"long1":"2015-07-13 00:00:00.100000000"} |
|
9
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_long.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,13 +8,17 @@ in:
|
|
8
8
|
- {name: long2, type: long}
|
9
9
|
- {name: long3, type: long}
|
10
10
|
- {name: long4, type: long}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timestamp_unit: ms
|
14
15
|
columns:
|
15
16
|
- {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
-
- {name: long2, type:
|
17
|
-
- {name: long3, type:
|
18
|
-
- {name: long4, type:
|
17
|
+
- {name: long2, type: long}
|
18
|
+
- {name: long3, type: double}
|
19
|
+
- {name: long4, type: timestamp}
|
20
|
+
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
21
|
+
- {name: $.record.long2, type: long}
|
22
|
+
- {name: $.record.long3, type: double}
|
19
23
|
out:
|
20
24
|
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
2015-07-13,2015-07-13,2015-07-13,2015-07-13,"{""string1"":""2015-07-13"" ,""string2"":""2015-07-13"" ,""string3"":""2015-07-13"" }"
|
2
|
+
2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,"{""string1"":""2015-07-13 UTC"" ,""string2"":""2015-07-13 UTC"" ,""string3"":""2015-07-13 UTC"" }"
|
3
|
+
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,"{""string1"":""2015-07-13 00:00:00"" ,""string2"":""2015-07-13 00:00:00"" ,""string3"":""2015-07-13 00:00:00"" }"
|
4
|
+
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,"{""string1"":""2015-07-12 16:00:00 UTC"" ,""string2"":""2015-07-12 16:00:00 UTC"" ,""string3"":""2015-07-12 16:00:00 UTC"" }"
|
5
|
+
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,"{""string1"":""2015-07-12 16:00:00.1 UTC"" ,""string2"":""2015-07-12 16:00:00.1 UTC"" ,""string3"":""2015-07-12 16:00:00.1 UTC"" }"
|
6
|
+
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,"{""string1"":""2015-07-12 16:00:00.12 UTC"" ,""string2"":""2015-07-12 16:00:00.12 UTC"" ,""string3"":""2015-07-12 16:00:00.12 UTC"" }"
|
7
|
+
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,"{""string1"":""2015-07-12 16:00:00.123 UTC"" ,""string2"":""2015-07-12 16:00:00.123 UTC"" ,""string3"":""2015-07-12 16:00:00.123 UTC"" }"
|
8
|
+
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,"{""string1"":""2015-07-12 16:00:00.1234 UTC"" ,""string2"":""2015-07-12 16:00:00.1234 UTC"" ,""string3"":""2015-07-12 16:00:00.1234 UTC"" }"
|
9
|
+
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,"{""string1"":""2015-07-12 16:00:00.12345 UTC"" ,""string2"":""2015-07-12 16:00:00.12345 UTC"" ,""string3"":""2015-07-12 16:00:00.12345 UTC"" }"
|
10
|
+
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,"{""string1"":""2015-07-12 16:00:00.123456 UTC"" ,""string2"":""2015-07-12 16:00:00.123456 UTC"" ,""string3"":""2015-07-12 16:00:00.123456 UTC"" }"
|
11
|
+
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,"{""string1"":""2015-07-12 16:00:00.1234567 UTC"" ,""string2"":""2015-07-12 16:00:00.1234567 UTC"" ,""string3"":""2015-07-12 16:00:00.1234567 UTC"" }"
|
12
|
+
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,"{""string1"":""2015-07-12 16:00:00.12345678 UTC"" ,""string2"":""2015-07-12 16:00:00.12345678 UTC"" ,""string3"":""2015-07-12 16:00:00.12345678 UTC"" }"
|
13
|
+
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,"{""string1"":""2015-07-12 16:00:00.123456789 UTC"",""string2"":""2015-07-12 16:00:00.123456789 UTC"",""string3"":""2015-07-12 16:00:00.123456789 UTC""}"
|
14
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
9
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
13
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
14
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
15
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
16
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
17
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
18
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
19
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
20
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
21
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -15,9 +16,12 @@ filters:
|
|
15
16
|
default_to_timezone: "Asia/Taipei"
|
16
17
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
18
|
columns:
|
18
|
-
- {name: string1}
|
19
|
-
- {name: string2, type:
|
20
|
-
- {name: string3, type:
|
21
|
-
- {name: string4, type:
|
19
|
+
- {name: string1, type: string}
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
22
26
|
out:
|
23
27
|
type: "null"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
9
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
13
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
14
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
15
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
16
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
17
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
18
|
+
| 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
|
19
|
+
| 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
|
20
|
+
| 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
|
21
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -17,8 +18,11 @@ filters:
|
|
17
18
|
timestamp_parser: auto_java
|
18
19
|
columns:
|
19
20
|
- {name: string1}
|
20
|
-
- {name: string2, type:
|
21
|
-
- {name: string3, type:
|
22
|
-
- {name: string4, type:
|
21
|
+
- {name: string2, type: long, to_unit: ms}
|
22
|
+
- {name: string3, type: double, to_unit: ms}
|
23
|
+
- {name: string4, type: timestamp}
|
24
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
25
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
26
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
23
27
|
out:
|
24
28
|
type: "null"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
9
|
+
| 2015-07-13 08:00:00.000000 +0800 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000 +0800"} |
|
10
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
11
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
12
|
+
| 2015-07-13 00:00:00.100000 +0800 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000 +0800"} |
|
13
|
+
| 2015-07-13 00:00:00.120000 +0800 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000 +0800"} |
|
14
|
+
| 2015-07-13 00:00:00.123000 +0800 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000 +0800"} |
|
15
|
+
| 2015-07-13 00:00:00.123400 +0800 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400 +0800"} |
|
16
|
+
| 2015-07-13 00:00:00.123450 +0800 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450 +0800"} |
|
17
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
18
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
19
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
20
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
21
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -16,8 +17,11 @@ filters:
|
|
16
17
|
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
|
17
18
|
columns:
|
18
19
|
- {name: string1}
|
19
|
-
- {name: string2, type:
|
20
|
-
- {name: string3, type:
|
21
|
-
- {name: string4, type:
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
22
26
|
out:
|
23
27
|
type: "null"
|
File without changes
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
|
4
|
+
2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
|
5
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
6
|
+
| timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
|
7
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,713,200,100 | 1.4367132001E12 | 2015-07-12 15:00:00.100 UTC |
|
9
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_timestamp.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
|
@@ -15,8 +15,8 @@ filters:
|
|
15
15
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
16
16
|
columns:
|
17
17
|
- {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
18
|
-
- {name: timestamp2, type:
|
19
|
-
- {name: timestamp3, type:
|
20
|
-
- {name: timestamp4, type:
|
18
|
+
- {name: timestamp2, type: long, to_unit: ms}
|
19
|
+
- {name: timestamp3, type: double, to_unit: ms}
|
20
|
+
- {name: timestamp4, type: timestamp}
|
21
21
|
out:
|
22
22
|
type: "null"
|
File without changes
|
data/example/nested.txt
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:25:21.964 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:25:22.829 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:25:22.844 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2016-11-06 14:25:22.850 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
5
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
|
+
| record:json |
|
7
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
+
| {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
|
9
|
+
| {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
|
10
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
data/example/nested.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/nested.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_to_timezone: "Asia/Tokyo"
|
9
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
columns:
|
11
|
+
- {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
|
12
|
+
- {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
13
|
+
out:
|
14
|
+
type: "null"
|
File without changes
|
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:25:02.170 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:25:03.024 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:25:03.039 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'timezone.csv'
|
4
|
+
2016-11-06 14:25:03.043 +0900 [INFO] (0001:preview): Loading files [example/timezone.csv]
|
5
|
+
+----------------+-------------------------------------+
|
6
|
+
| string1:string | string2:string |
|
7
|
+
+----------------+-------------------------------------+
|
8
|
+
| 2015-07-13 | 2015-07-13 00:00:00.000000000 +0900 |
|
9
|
+
| 2015-07-13 | 2015-07-13 00:00:00.100000000 +0900 |
|
10
|
+
+----------------+-------------------------------------+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/timezone.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
filters:
|
10
|
+
- type: timestamp_format
|
11
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
12
|
+
columns:
|
13
|
+
- {name: string1, to_format: "%Y-%m-%d", to_timezone: "Asia/Tokyo"}
|
14
|
+
- {name: string2, to_format: "%Y-%m-%d %H:%M:%S.%N %z", to_timezone: "Asia/Tokyo"}
|
15
|
+
out:
|
16
|
+
type: "null"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
3
4
|
import org.embulk.filter.timestamp_format.cast.DoubleCast;
|
4
5
|
import org.embulk.filter.timestamp_format.cast.LongCast;
|
5
6
|
import org.embulk.filter.timestamp_format.cast.StringCast;
|
@@ -237,7 +238,8 @@ public class ColumnCaster
|
|
237
238
|
|
238
239
|
public void setFromJson(Column outputColumn, Value value)
|
239
240
|
{
|
240
|
-
String
|
241
|
+
String pathFragment = PropertyPathToken.getPathFragment(outputColumn.getName());
|
242
|
+
String jsonPath = new StringBuilder("$").append(pathFragment).toString();
|
241
243
|
pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value));
|
242
244
|
}
|
243
245
|
}
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
3
4
|
import org.embulk.spi.DataException;
|
4
5
|
import org.embulk.spi.PageReader;
|
5
6
|
import org.embulk.spi.Schema;
|
@@ -48,10 +49,9 @@ public class ColumnVisitorImpl
|
|
48
49
|
// columnName => Boolean to avoid unnecessary cast
|
49
50
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
50
51
|
String name = columnConfig.getName();
|
51
|
-
if (
|
52
|
-
String
|
53
|
-
|
54
|
-
shouldCastSet.add(firstPartName);
|
52
|
+
if (PathCompiler.isProbablyJsonPath(name)) {
|
53
|
+
String columnName = JsonPathUtil.getColumnName(name);
|
54
|
+
shouldCastSet.add(columnName);
|
55
55
|
continue;
|
56
56
|
}
|
57
57
|
shouldCastSet.add(name);
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
10
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
|
11
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
12
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
|
13
|
+
import org.embulk.config.ConfigException;
|
14
|
+
|
15
|
+
public class JsonPathUtil
|
16
|
+
{
|
17
|
+
private JsonPathUtil() {}
|
18
|
+
|
19
|
+
public static String getColumnName(String jsonPath)
|
20
|
+
{
|
21
|
+
Path compiledPath;
|
22
|
+
try {
|
23
|
+
compiledPath = PathCompiler.compile(jsonPath);
|
24
|
+
}
|
25
|
+
catch (InvalidPathException e) {
|
26
|
+
throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
|
27
|
+
}
|
28
|
+
PathToken pathToken = compiledPath.getRoot();
|
29
|
+
pathToken = pathToken.next(); // skip $
|
30
|
+
return ((PropertyPathToken) pathToken).getProperties().get(0);
|
31
|
+
}
|
32
|
+
|
33
|
+
public static void assertJsonPathFormat(String path)
|
34
|
+
{
|
35
|
+
Path compiledPath;
|
36
|
+
try {
|
37
|
+
compiledPath = PathCompiler.compile(path);
|
38
|
+
}
|
39
|
+
catch (InvalidPathException e) {
|
40
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
41
|
+
}
|
42
|
+
PathToken pathToken = compiledPath.getRoot();
|
43
|
+
while (true) {
|
44
|
+
assertSupportedPathToken(pathToken, path);
|
45
|
+
if (pathToken.isLeaf()) {
|
46
|
+
break;
|
47
|
+
}
|
48
|
+
pathToken = pathToken.next();
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
protected static void assertSupportedPathToken(PathToken pathToken, String path)
|
53
|
+
{
|
54
|
+
if (pathToken instanceof ArrayPathToken) {
|
55
|
+
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
56
|
+
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
57
|
+
}
|
58
|
+
else if (pathToken instanceof ScanPathToken) {
|
59
|
+
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
60
|
+
}
|
61
|
+
else if (pathToken instanceof FunctionPathToken) {
|
62
|
+
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
63
|
+
}
|
64
|
+
else if (pathToken instanceof PredicatePathToken) {
|
65
|
+
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
70
|
+
{
|
71
|
+
if (arrayIndexOperation == null) {
|
72
|
+
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
73
|
+
}
|
74
|
+
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
75
|
+
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
@@ -1,5 +1,10 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
3
8
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
|
4
9
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask;
|
5
10
|
|
@@ -28,19 +33,32 @@ public class JsonVisitor
|
|
28
33
|
this.task = task;
|
29
34
|
this.jsonCaster = jsonCaster;
|
30
35
|
|
36
|
+
assertJsonPathFormat();
|
31
37
|
buildJsonPathColumnConfigMap();
|
32
38
|
buildShouldVisitSet();
|
33
39
|
}
|
34
40
|
|
41
|
+
private void assertJsonPathFormat()
|
42
|
+
{
|
43
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
44
|
+
String name = columnConfig.getName();
|
45
|
+
if (!PathCompiler.isProbablyJsonPath(name)) {
|
46
|
+
continue;
|
47
|
+
}
|
48
|
+
JsonPathUtil.assertJsonPathFormat(name);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
35
52
|
private void buildJsonPathColumnConfigMap()
|
36
53
|
{
|
37
54
|
// json path => Type
|
38
55
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
39
56
|
String name = columnConfig.getName();
|
40
|
-
if (!
|
57
|
+
if (!PathCompiler.isProbablyJsonPath(name)) {
|
41
58
|
continue;
|
42
59
|
}
|
43
|
-
|
60
|
+
Path compiledPath = PathCompiler.compile(name);
|
61
|
+
this.jsonPathColumnConfigMap.put(compiledPath.toString(), columnConfig);
|
44
62
|
}
|
45
63
|
}
|
46
64
|
|
@@ -49,26 +67,16 @@ public class JsonVisitor
|
|
49
67
|
// json partial path => Boolean to avoid unnecessary type: json visit
|
50
68
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
51
69
|
String name = columnConfig.getName();
|
52
|
-
if (!
|
70
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
53
71
|
continue;
|
54
72
|
}
|
55
|
-
|
73
|
+
Path compiledPath = PathCompiler.compile(name);
|
74
|
+
PathToken parts = compiledPath.getRoot();
|
56
75
|
StringBuilder partialPath = new StringBuilder("$");
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
this.shouldVisitSet.add(partialPath.toString());
|
62
|
-
for (int j = 1; j < arrayParts.length; j++) {
|
63
|
-
// Support both [0] and [*]
|
64
|
-
partialPath.append("[").append(arrayParts[j]);
|
65
|
-
this.shouldVisitSet.add(partialPath.toString());
|
66
|
-
}
|
67
|
-
}
|
68
|
-
else {
|
69
|
-
partialPath.append(".").append(parts[i]);
|
70
|
-
this.shouldVisitSet.add(partialPath.toString());
|
71
|
-
}
|
76
|
+
while (! parts.isLeaf()) {
|
77
|
+
parts = parts.next(); // first next() skips "$"
|
78
|
+
partialPath.append(parts.getPathFragment());
|
79
|
+
this.shouldVisitSet.add(partialPath.toString());
|
72
80
|
}
|
73
81
|
}
|
74
82
|
}
|
@@ -88,7 +96,8 @@ public class JsonVisitor
|
|
88
96
|
int size = arrayValue.size();
|
89
97
|
Value[] newValue = new Value[size];
|
90
98
|
for (int i = 0; i < size; i++) {
|
91
|
-
String
|
99
|
+
String pathFragment = ArrayPathToken.getPathFragment(i);
|
100
|
+
String k = new StringBuilder(rootPath).append(pathFragment).toString();
|
92
101
|
if (!shouldVisit(k)) {
|
93
102
|
k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
|
94
103
|
}
|
@@ -105,7 +114,8 @@ public class JsonVisitor
|
|
105
114
|
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
106
115
|
Value k = entry.getKey();
|
107
116
|
Value v = entry.getValue();
|
108
|
-
String
|
117
|
+
String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
|
118
|
+
String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
|
109
119
|
Value r = visit(newPath, v);
|
110
120
|
newValue[i++] = k;
|
111
121
|
newValue[i++] = r;
|
@@ -2,6 +2,7 @@ package org.embulk.filter.timestamp_format;
|
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
4
|
import com.google.common.collect.ImmutableList;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
5
6
|
import org.embulk.config.Config;
|
6
7
|
import org.embulk.config.ConfigDefault;
|
7
8
|
import org.embulk.config.ConfigException;
|
@@ -99,10 +100,9 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
99
100
|
// throw if column does not exist
|
100
101
|
for (ColumnConfig columnConfig : columns) {
|
101
102
|
String name = columnConfig.getName();
|
102
|
-
if (
|
103
|
-
String
|
104
|
-
|
105
|
-
inputSchema.lookupColumn(firstNameWithoutArray);
|
103
|
+
if (PathCompiler.isProbablyJsonPath(name)) {
|
104
|
+
String columnName = JsonPathUtil.getColumnName(name);
|
105
|
+
inputSchema.lookupColumn(columnName);
|
106
106
|
}
|
107
107
|
else {
|
108
108
|
inputSchema.lookupColumn(name);
|
@@ -119,7 +119,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
119
119
|
if (type instanceof JsonType) {
|
120
120
|
throw new ConfigException(String.format("casting to json is not available: \"%s\"", name));
|
121
121
|
}
|
122
|
-
if (
|
122
|
+
if (PathCompiler.isProbablyJsonPath(name) && type instanceof TimestampType) {
|
123
123
|
throw new ConfigException(String.format("casting a json path into timestamp is not available: \"%s\"", name));
|
124
124
|
}
|
125
125
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,28 +58,32 @@ files:
|
|
58
58
|
- bench/gen_dummy.rb
|
59
59
|
- build.gradle
|
60
60
|
- config/checkstyle/checkstyle.xml
|
61
|
-
- example/
|
62
|
-
- example/
|
61
|
+
- example/bracket_notation.txt
|
62
|
+
- example/bracket_notation.yml
|
63
63
|
- example/empty.yml
|
64
|
-
- example/example.jsonl
|
65
64
|
- example/example.yml
|
66
|
-
- example/
|
67
|
-
- example/
|
68
|
-
- example/
|
69
|
-
- example/
|
70
|
-
- example/
|
71
|
-
- example/
|
72
|
-
- example/
|
73
|
-
- example/
|
74
|
-
- example/
|
75
|
-
- example/
|
76
|
-
- example/
|
77
|
-
- example/
|
78
|
-
- example/
|
79
|
-
- example/
|
80
|
-
- example/
|
81
|
-
- example/
|
82
|
-
- example/
|
65
|
+
- example/from_double.csv
|
66
|
+
- example/from_double.txt
|
67
|
+
- example/from_double.yml
|
68
|
+
- example/from_long.csv
|
69
|
+
- example/from_long.txt
|
70
|
+
- example/from_long.yml
|
71
|
+
- example/from_string.csv
|
72
|
+
- example/from_string.txt
|
73
|
+
- example/from_string.yml
|
74
|
+
- example/from_string_auto_java.txt
|
75
|
+
- example/from_string_auto_java.yml
|
76
|
+
- example/from_string_java.txt
|
77
|
+
- example/from_string_java.yml
|
78
|
+
- example/from_timestamp.csv
|
79
|
+
- example/from_timestamp.txt
|
80
|
+
- example/from_timestamp.yml
|
81
|
+
- example/nested.jsonl
|
82
|
+
- example/nested.txt
|
83
|
+
- example/nested.yml
|
84
|
+
- example/timezone.csv
|
85
|
+
- example/timezone.txt
|
86
|
+
- example/timezone.yml
|
83
87
|
- gradle/wrapper/gradle-wrapper.jar
|
84
88
|
- gradle/wrapper/gradle-wrapper.properties
|
85
89
|
- gradlew
|
@@ -89,6 +93,7 @@ files:
|
|
89
93
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java
|
90
94
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
|
91
95
|
- src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
|
96
|
+
- src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java
|
92
97
|
- src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
|
93
98
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
|
94
99
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
|
@@ -102,7 +107,13 @@ files:
|
|
102
107
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
103
108
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
104
109
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
105
|
-
- classpath/
|
110
|
+
- classpath/accessors-smart-1.1.jar
|
111
|
+
- classpath/asm-5.0.3.jar
|
112
|
+
- classpath/commons-lang3-3.4.jar
|
113
|
+
- classpath/embulk-filter-timestamp_format-0.2.4.jar
|
114
|
+
- classpath/json-smart-2.2.1.jar
|
115
|
+
- classpath/JsonPathCompiler-0.1.1.jar
|
116
|
+
- classpath/slf4j-api-1.7.21.jar
|
106
117
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
107
118
|
licenses:
|
108
119
|
- MIT
|
data/example/double.csv
DELETED
data/example/example2.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example2.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
filters:
|
10
|
-
- type: timestamp_format
|
11
|
-
columns:
|
12
|
-
- {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_double.jsonl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
{"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
|
data/example/json_double.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_double.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_unit: ms
|
9
|
-
columns:
|
10
|
-
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.double2, type: long}
|
12
|
-
- {name: $.record.double3, type: double}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_long.jsonl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
{"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
|
data/example/json_long.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_long.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_unit: ms
|
9
|
-
columns:
|
10
|
-
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.long2, type: long}
|
12
|
-
- {name: $.record.long3, type: double}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_string.jsonl
DELETED
data/example/json_string.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_string.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
|
9
|
-
columns:
|
10
|
-
- {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.string2, type: long, to_unit: ms}
|
12
|
-
- {name: $.record.string3, type: double, to_unit: ms}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/long.csv
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
1436713200100,1436713200100,1436713200100,1436713200100
|
data/example/string.csv
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
2015-07-13,2015-07-13,2015-07-13,2015-07-13
|
2
|
-
2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC
|
3
|
-
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
4
|
-
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
5
|
-
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
6
|
-
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
|
7
|
-
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
|
8
|
-
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
|
9
|
-
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
|
10
|
-
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
|
11
|
-
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
|
12
|
-
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
|
13
|
-
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
|
14
|
-
|
data/example/string_java.yml
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/string.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
- {name: string3, type: string}
|
10
|
-
- {name: string4, type: string}
|
11
|
-
filters:
|
12
|
-
- type: timestamp_format
|
13
|
-
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
|
-
default_to_timezone: "Asia/Taipei"
|
16
|
-
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
|
-
columns:
|
18
|
-
- {name: string1}
|
19
|
-
- {name: string2, type: timestamp}
|
20
|
-
- {name: string3, type: long, to_unit: ms}
|
21
|
-
- {name: string4, type: double, to_unit: ms}
|
22
|
-
out:
|
23
|
-
type: "null"
|