embulk-filter-timestamp_format 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +11 -5
- data/build.gradle +3 -2
- data/example/bracket_notation.txt +10 -0
- data/example/bracket_notation.yml +14 -0
- data/example/example.yml +18 -5
- data/example/from_double.csv +1 -0
- data/example/from_double.txt +9 -0
- data/example/{double.yml → from_double.yml} +8 -4
- data/example/from_long.csv +1 -0
- data/example/from_long.txt +9 -0
- data/example/{long.yml → from_long.yml} +8 -4
- data/example/from_string.csv +14 -0
- data/example/from_string.txt +21 -0
- data/example/{string.yml → from_string.yml} +9 -5
- data/example/from_string_auto_java.txt +21 -0
- data/example/{string_auto_java.yml → from_string_auto_java.yml} +8 -4
- data/example/from_string_java.txt +21 -0
- data/example/{string_nano.yml → from_string_java.yml} +8 -4
- data/example/{timestamp.csv → from_timestamp.csv} +0 -0
- data/example/from_timestamp.txt +9 -0
- data/example/{timestamp.yml → from_timestamp.yml} +4 -4
- data/example/{example.jsonl → nested.jsonl} +0 -0
- data/example/nested.txt +10 -0
- data/example/nested.yml +14 -0
- data/example/{example2.csv → timezone.csv} +0 -0
- data/example/timezone.txt +10 -0
- data/example/timezone.yml +16 -0
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +3 -1
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +4 -4
- data/src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java +78 -0
- data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +31 -21
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +5 -5
- metadata +34 -23
- data/example/double.csv +0 -2
- data/example/example2.yml +0 -14
- data/example/json_double.jsonl +0 -1
- data/example/json_double.yml +0 -14
- data/example/json_long.jsonl +0 -1
- data/example/json_long.yml +0 -14
- data/example/json_string.jsonl +0 -2
- data/example/json_string.yml +0 -14
- data/example/long.csv +0 -1
- data/example/string.csv +0 -14
- data/example/string_java.yml +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
|
4
|
+
data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
|
7
|
+
data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -63,17 +63,23 @@ Output will be as:
|
|
63
63
|
|
64
64
|
See [./example](./example) for more examples.
|
65
65
|
|
66
|
-
## JSONPath
|
66
|
+
## JSONPath
|
67
67
|
|
68
68
|
For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
|
69
69
|
|
70
70
|
```
|
71
|
-
$.payload.key1
|
72
|
-
$.payload.array[0]
|
73
|
-
$.payload.array[*]
|
71
|
+
name: $.payload.key1
|
72
|
+
name: "$.payload.array[0]"
|
73
|
+
name: "$.payload.array[*]"
|
74
|
+
name: $['payload']['key1.key2']
|
74
75
|
```
|
75
76
|
|
76
|
-
|
77
|
+
Following operators of JSONPath are not supported:
|
78
|
+
|
79
|
+
* Multiple properties such as `['name','name']`
|
80
|
+
* Multiple array indexes such as `[1,2]`
|
81
|
+
* Array slice such as `[1:2]`
|
82
|
+
* Filter expression such as `[?(<expression>)]`
|
77
83
|
|
78
84
|
## JRuby Timestamp Parser Performance Issue
|
79
85
|
|
data/build.gradle
CHANGED
@@ -13,14 +13,15 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.2.
|
16
|
+
version = "0.2.4"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.8.+"
|
22
22
|
provided "org.embulk:embulk-core:0.8.+"
|
23
|
-
|
23
|
+
compile "io.github.medjed:JsonPathCompiler:0.1.+"
|
24
|
+
|
24
25
|
testCompile "junit:junit:4.+"
|
25
26
|
testCompile "org.embulk:embulk-core:0.7.+:tests"
|
26
27
|
}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
5
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
|
+
| record:json |
|
7
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
+
| {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
|
9
|
+
| {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
|
10
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/nested.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_to_timezone: "Asia/Tokyo"
|
9
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
columns:
|
11
|
+
- {name: "$['record']['timestamp']", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
|
12
|
+
- {name: "$['record']['nested']['nested'][0]['timestamp']", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/example/example.yml
CHANGED
@@ -1,14 +1,27 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
|
-
type:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
6
12
|
filters:
|
7
13
|
- type: timestamp_format
|
8
|
-
|
14
|
+
default_from_timezone: "Asia/Taipei"
|
15
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
|
16
|
+
default_to_timezone: "Asia/Taipei"
|
9
17
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
18
|
columns:
|
11
|
-
- {name:
|
12
|
-
- {name:
|
19
|
+
- {name: string1, type: string}
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
13
26
|
out:
|
14
27
|
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2,"{""double1"":1436713200100.2,""double2"":1436713200100.2,""double3"":1436713200100.2}"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
|
4
|
+
2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
|
5
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
6
|
+
| double1:string | double2:long | double3:double | double4:timestamp | record:json |
|
7
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100199936 | 1,436,713,200 | 1.4367132001002E9 | 2015-07-12 15:00:00.100199936 UTC | {"double2":1436713200,"double3":1.4367132001002E9,"double1":"2015-07-13 00:00:00.100199936"} |
|
9
|
+
+-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_double.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,13 +8,17 @@ in:
|
|
8
8
|
- {name: double2, type: double}
|
9
9
|
- {name: double3, type: double}
|
10
10
|
- {name: double4, type: double}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timestamp_unit: ms
|
14
15
|
columns:
|
15
16
|
- {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
-
- {name: double2, type:
|
17
|
-
- {name: double3, type:
|
18
|
-
- {name: double4, type:
|
17
|
+
- {name: double2, type: long}
|
18
|
+
- {name: double3, type: double}
|
19
|
+
- {name: double4, type: timestamp}
|
20
|
+
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
21
|
+
- {name: $.record.double2, type: long}
|
22
|
+
- {name: $.record.double3, type: double}
|
19
23
|
out:
|
20
24
|
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100,1436713200100,1436713200100,1436713200100,"{""long1"":1436713200100,""long2"":1436713200100,""long3"":1436713200100}"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
|
4
|
+
2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
|
5
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
6
|
+
| long1:string | long2:long | long3:double | long4:timestamp | record:json |
|
7
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,713,200 | 1.4367132E9 | 2015-07-12 15:00:00.100 UTC | {"long3":1.4367132E9,"long2":1436713200,"long1":"2015-07-13 00:00:00.100000000"} |
|
9
|
+
+-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_long.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,13 +8,17 @@ in:
|
|
8
8
|
- {name: long2, type: long}
|
9
9
|
- {name: long3, type: long}
|
10
10
|
- {name: long4, type: long}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timestamp_unit: ms
|
14
15
|
columns:
|
15
16
|
- {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
-
- {name: long2, type:
|
17
|
-
- {name: long3, type:
|
18
|
-
- {name: long4, type:
|
17
|
+
- {name: long2, type: long}
|
18
|
+
- {name: long3, type: double}
|
19
|
+
- {name: long4, type: timestamp}
|
20
|
+
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
21
|
+
- {name: $.record.long2, type: long}
|
22
|
+
- {name: $.record.long3, type: double}
|
19
23
|
out:
|
20
24
|
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
2015-07-13,2015-07-13,2015-07-13,2015-07-13,"{""string1"":""2015-07-13"" ,""string2"":""2015-07-13"" ,""string3"":""2015-07-13"" }"
|
2
|
+
2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,"{""string1"":""2015-07-13 UTC"" ,""string2"":""2015-07-13 UTC"" ,""string3"":""2015-07-13 UTC"" }"
|
3
|
+
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,"{""string1"":""2015-07-13 00:00:00"" ,""string2"":""2015-07-13 00:00:00"" ,""string3"":""2015-07-13 00:00:00"" }"
|
4
|
+
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,"{""string1"":""2015-07-12 16:00:00 UTC"" ,""string2"":""2015-07-12 16:00:00 UTC"" ,""string3"":""2015-07-12 16:00:00 UTC"" }"
|
5
|
+
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,"{""string1"":""2015-07-12 16:00:00.1 UTC"" ,""string2"":""2015-07-12 16:00:00.1 UTC"" ,""string3"":""2015-07-12 16:00:00.1 UTC"" }"
|
6
|
+
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,"{""string1"":""2015-07-12 16:00:00.12 UTC"" ,""string2"":""2015-07-12 16:00:00.12 UTC"" ,""string3"":""2015-07-12 16:00:00.12 UTC"" }"
|
7
|
+
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,"{""string1"":""2015-07-12 16:00:00.123 UTC"" ,""string2"":""2015-07-12 16:00:00.123 UTC"" ,""string3"":""2015-07-12 16:00:00.123 UTC"" }"
|
8
|
+
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,"{""string1"":""2015-07-12 16:00:00.1234 UTC"" ,""string2"":""2015-07-12 16:00:00.1234 UTC"" ,""string3"":""2015-07-12 16:00:00.1234 UTC"" }"
|
9
|
+
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,"{""string1"":""2015-07-12 16:00:00.12345 UTC"" ,""string2"":""2015-07-12 16:00:00.12345 UTC"" ,""string3"":""2015-07-12 16:00:00.12345 UTC"" }"
|
10
|
+
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,"{""string1"":""2015-07-12 16:00:00.123456 UTC"" ,""string2"":""2015-07-12 16:00:00.123456 UTC"" ,""string3"":""2015-07-12 16:00:00.123456 UTC"" }"
|
11
|
+
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,"{""string1"":""2015-07-12 16:00:00.1234567 UTC"" ,""string2"":""2015-07-12 16:00:00.1234567 UTC"" ,""string3"":""2015-07-12 16:00:00.1234567 UTC"" }"
|
12
|
+
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,"{""string1"":""2015-07-12 16:00:00.12345678 UTC"" ,""string2"":""2015-07-12 16:00:00.12345678 UTC"" ,""string3"":""2015-07-12 16:00:00.12345678 UTC"" }"
|
13
|
+
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,"{""string1"":""2015-07-12 16:00:00.123456789 UTC"",""string2"":""2015-07-12 16:00:00.123456789 UTC"",""string3"":""2015-07-12 16:00:00.123456789 UTC""}"
|
14
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
9
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
13
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
14
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
15
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
16
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
17
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
18
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
19
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
20
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
21
|
+
+-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -15,9 +16,12 @@ filters:
|
|
15
16
|
default_to_timezone: "Asia/Taipei"
|
16
17
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
18
|
columns:
|
18
|
-
- {name: string1}
|
19
|
-
- {name: string2, type:
|
20
|
-
- {name: string3, type:
|
21
|
-
- {name: string4, type:
|
19
|
+
- {name: string1, type: string}
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
22
26
|
out:
|
23
27
|
type: "null"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
9
|
+
| 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
|
10
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
11
|
+
| 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
|
12
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
|
13
|
+
| 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
|
14
|
+
| 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
|
15
|
+
| 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
|
16
|
+
| 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
|
17
|
+
| 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
|
18
|
+
| 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
|
19
|
+
| 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
|
20
|
+
| 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
|
21
|
+
+-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -17,8 +18,11 @@ filters:
|
|
17
18
|
timestamp_parser: auto_java
|
18
19
|
columns:
|
19
20
|
- {name: string1}
|
20
|
-
- {name: string2, type:
|
21
|
-
- {name: string3, type:
|
22
|
-
- {name: string4, type:
|
21
|
+
- {name: string2, type: long, to_unit: ms}
|
22
|
+
- {name: string3, type: double, to_unit: ms}
|
23
|
+
- {name: string4, type: timestamp}
|
24
|
+
- {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
25
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
26
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
23
27
|
out:
|
24
28
|
type: "null"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
|
4
|
+
2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
|
5
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
6
|
+
| string1:string | string2:long | string3:double | string4:timestamp | record:json |
|
7
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
9
|
+
| 2015-07-13 08:00:00.000000 +0800 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000 +0800"} |
|
10
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
11
|
+
| 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
|
12
|
+
| 2015-07-13 00:00:00.100000 +0800 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000 +0800"} |
|
13
|
+
| 2015-07-13 00:00:00.120000 +0800 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000 +0800"} |
|
14
|
+
| 2015-07-13 00:00:00.123000 +0800 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000 +0800"} |
|
15
|
+
| 2015-07-13 00:00:00.123400 +0800 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400 +0800"} |
|
16
|
+
| 2015-07-13 00:00:00.123450 +0800 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450 +0800"} |
|
17
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
18
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
19
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
20
|
+
| 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
|
21
|
+
+----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -8,6 +8,7 @@ in:
|
|
8
8
|
- {name: string2, type: string}
|
9
9
|
- {name: string3, type: string}
|
10
10
|
- {name: string4, type: string}
|
11
|
+
- {name: record, type: json}
|
11
12
|
filters:
|
12
13
|
- type: timestamp_format
|
13
14
|
default_from_timezone: "Asia/Taipei"
|
@@ -16,8 +17,11 @@ filters:
|
|
16
17
|
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
|
17
18
|
columns:
|
18
19
|
- {name: string1}
|
19
|
-
- {name: string2, type:
|
20
|
-
- {name: string3, type:
|
21
|
-
- {name: string4, type:
|
20
|
+
- {name: string2, type: long, to_unit: ms}
|
21
|
+
- {name: string3, type: double, to_unit: ms}
|
22
|
+
- {name: string4, type: timestamp}
|
23
|
+
- {name: $.record.string1}
|
24
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
25
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
22
26
|
out:
|
23
27
|
type: "null"
|
File without changes
|
@@ -0,0 +1,9 @@
|
|
1
|
+
2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
|
4
|
+
2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
|
5
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
6
|
+
| timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
|
7
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
8
|
+
| 2015-07-13 00:00:00.100000000 | 1,436,713,200,100 | 1.4367132001E12 | 2015-07-12 15:00:00.100 UTC |
|
9
|
+
+-------------------------------+-------------------+-------------------+-----------------------------+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/from_timestamp.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
|
@@ -15,8 +15,8 @@ filters:
|
|
15
15
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
16
16
|
columns:
|
17
17
|
- {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
18
|
-
- {name: timestamp2, type:
|
19
|
-
- {name: timestamp3, type:
|
20
|
-
- {name: timestamp4, type:
|
18
|
+
- {name: timestamp2, type: long, to_unit: ms}
|
19
|
+
- {name: timestamp3, type: double, to_unit: ms}
|
20
|
+
- {name: timestamp4, type: timestamp}
|
21
21
|
out:
|
22
22
|
type: "null"
|
File without changes
|
data/example/nested.txt
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:25:21.964 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:25:22.829 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:25:22.844 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
|
4
|
+
2016-11-06 14:25:22.850 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
|
5
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
6
|
+
| record:json |
|
7
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
8
|
+
| {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
|
9
|
+
| {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
|
10
|
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|
data/example/nested.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/nested.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_to_timezone: "Asia/Tokyo"
|
9
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
columns:
|
11
|
+
- {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
|
12
|
+
- {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
13
|
+
out:
|
14
|
+
type: "null"
|
File without changes
|
@@ -0,0 +1,10 @@
|
|
1
|
+
2016-11-06 14:25:02.170 +0900: Embulk v0.8.6
|
2
|
+
2016-11-06 14:25:03.024 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
|
3
|
+
2016-11-06 14:25:03.039 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'timezone.csv'
|
4
|
+
2016-11-06 14:25:03.043 +0900 [INFO] (0001:preview): Loading files [example/timezone.csv]
|
5
|
+
+----------------+-------------------------------------+
|
6
|
+
| string1:string | string2:string |
|
7
|
+
+----------------+-------------------------------------+
|
8
|
+
| 2015-07-13 | 2015-07-13 00:00:00.000000000 +0900 |
|
9
|
+
| 2015-07-13 | 2015-07-13 00:00:00.100000000 +0900 |
|
10
|
+
+----------------+-------------------------------------+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/timezone.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
filters:
|
10
|
+
- type: timestamp_format
|
11
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
12
|
+
columns:
|
13
|
+
- {name: string1, to_format: "%Y-%m-%d", to_timezone: "Asia/Tokyo"}
|
14
|
+
- {name: string2, to_format: "%Y-%m-%d %H:%M:%S.%N %z", to_timezone: "Asia/Tokyo"}
|
15
|
+
out:
|
16
|
+
type: "null"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
3
4
|
import org.embulk.filter.timestamp_format.cast.DoubleCast;
|
4
5
|
import org.embulk.filter.timestamp_format.cast.LongCast;
|
5
6
|
import org.embulk.filter.timestamp_format.cast.StringCast;
|
@@ -237,7 +238,8 @@ public class ColumnCaster
|
|
237
238
|
|
238
239
|
public void setFromJson(Column outputColumn, Value value)
|
239
240
|
{
|
240
|
-
String
|
241
|
+
String pathFragment = PropertyPathToken.getPathFragment(outputColumn.getName());
|
242
|
+
String jsonPath = new StringBuilder("$").append(pathFragment).toString();
|
241
243
|
pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value));
|
242
244
|
}
|
243
245
|
}
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
3
4
|
import org.embulk.spi.DataException;
|
4
5
|
import org.embulk.spi.PageReader;
|
5
6
|
import org.embulk.spi.Schema;
|
@@ -48,10 +49,9 @@ public class ColumnVisitorImpl
|
|
48
49
|
// columnName => Boolean to avoid unnecessary cast
|
49
50
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
50
51
|
String name = columnConfig.getName();
|
51
|
-
if (
|
52
|
-
String
|
53
|
-
|
54
|
-
shouldCastSet.add(firstPartName);
|
52
|
+
if (PathCompiler.isProbablyJsonPath(name)) {
|
53
|
+
String columnName = JsonPathUtil.getColumnName(name);
|
54
|
+
shouldCastSet.add(columnName);
|
55
55
|
continue;
|
56
56
|
}
|
57
57
|
shouldCastSet.add(name);
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
10
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
|
11
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
12
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
|
13
|
+
import org.embulk.config.ConfigException;
|
14
|
+
|
15
|
+
public class JsonPathUtil
|
16
|
+
{
|
17
|
+
private JsonPathUtil() {}
|
18
|
+
|
19
|
+
public static String getColumnName(String jsonPath)
|
20
|
+
{
|
21
|
+
Path compiledPath;
|
22
|
+
try {
|
23
|
+
compiledPath = PathCompiler.compile(jsonPath);
|
24
|
+
}
|
25
|
+
catch (InvalidPathException e) {
|
26
|
+
throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
|
27
|
+
}
|
28
|
+
PathToken pathToken = compiledPath.getRoot();
|
29
|
+
pathToken = pathToken.next(); // skip $
|
30
|
+
return ((PropertyPathToken) pathToken).getProperties().get(0);
|
31
|
+
}
|
32
|
+
|
33
|
+
public static void assertJsonPathFormat(String path)
|
34
|
+
{
|
35
|
+
Path compiledPath;
|
36
|
+
try {
|
37
|
+
compiledPath = PathCompiler.compile(path);
|
38
|
+
}
|
39
|
+
catch (InvalidPathException e) {
|
40
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
41
|
+
}
|
42
|
+
PathToken pathToken = compiledPath.getRoot();
|
43
|
+
while (true) {
|
44
|
+
assertSupportedPathToken(pathToken, path);
|
45
|
+
if (pathToken.isLeaf()) {
|
46
|
+
break;
|
47
|
+
}
|
48
|
+
pathToken = pathToken.next();
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
protected static void assertSupportedPathToken(PathToken pathToken, String path)
|
53
|
+
{
|
54
|
+
if (pathToken instanceof ArrayPathToken) {
|
55
|
+
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
56
|
+
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
57
|
+
}
|
58
|
+
else if (pathToken instanceof ScanPathToken) {
|
59
|
+
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
60
|
+
}
|
61
|
+
else if (pathToken instanceof FunctionPathToken) {
|
62
|
+
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
63
|
+
}
|
64
|
+
else if (pathToken instanceof PredicatePathToken) {
|
65
|
+
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
70
|
+
{
|
71
|
+
if (arrayIndexOperation == null) {
|
72
|
+
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
73
|
+
}
|
74
|
+
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
75
|
+
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
@@ -1,5 +1,10 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
3
8
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
|
4
9
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask;
|
5
10
|
|
@@ -28,19 +33,32 @@ public class JsonVisitor
|
|
28
33
|
this.task = task;
|
29
34
|
this.jsonCaster = jsonCaster;
|
30
35
|
|
36
|
+
assertJsonPathFormat();
|
31
37
|
buildJsonPathColumnConfigMap();
|
32
38
|
buildShouldVisitSet();
|
33
39
|
}
|
34
40
|
|
41
|
+
private void assertJsonPathFormat()
|
42
|
+
{
|
43
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
44
|
+
String name = columnConfig.getName();
|
45
|
+
if (!PathCompiler.isProbablyJsonPath(name)) {
|
46
|
+
continue;
|
47
|
+
}
|
48
|
+
JsonPathUtil.assertJsonPathFormat(name);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
35
52
|
private void buildJsonPathColumnConfigMap()
|
36
53
|
{
|
37
54
|
// json path => Type
|
38
55
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
39
56
|
String name = columnConfig.getName();
|
40
|
-
if (!
|
57
|
+
if (!PathCompiler.isProbablyJsonPath(name)) {
|
41
58
|
continue;
|
42
59
|
}
|
43
|
-
|
60
|
+
Path compiledPath = PathCompiler.compile(name);
|
61
|
+
this.jsonPathColumnConfigMap.put(compiledPath.toString(), columnConfig);
|
44
62
|
}
|
45
63
|
}
|
46
64
|
|
@@ -49,26 +67,16 @@ public class JsonVisitor
|
|
49
67
|
// json partial path => Boolean to avoid unnecessary type: json visit
|
50
68
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
51
69
|
String name = columnConfig.getName();
|
52
|
-
if (!
|
70
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
53
71
|
continue;
|
54
72
|
}
|
55
|
-
|
73
|
+
Path compiledPath = PathCompiler.compile(name);
|
74
|
+
PathToken parts = compiledPath.getRoot();
|
56
75
|
StringBuilder partialPath = new StringBuilder("$");
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
this.shouldVisitSet.add(partialPath.toString());
|
62
|
-
for (int j = 1; j < arrayParts.length; j++) {
|
63
|
-
// Support both [0] and [*]
|
64
|
-
partialPath.append("[").append(arrayParts[j]);
|
65
|
-
this.shouldVisitSet.add(partialPath.toString());
|
66
|
-
}
|
67
|
-
}
|
68
|
-
else {
|
69
|
-
partialPath.append(".").append(parts[i]);
|
70
|
-
this.shouldVisitSet.add(partialPath.toString());
|
71
|
-
}
|
76
|
+
while (! parts.isLeaf()) {
|
77
|
+
parts = parts.next(); // first next() skips "$"
|
78
|
+
partialPath.append(parts.getPathFragment());
|
79
|
+
this.shouldVisitSet.add(partialPath.toString());
|
72
80
|
}
|
73
81
|
}
|
74
82
|
}
|
@@ -88,7 +96,8 @@ public class JsonVisitor
|
|
88
96
|
int size = arrayValue.size();
|
89
97
|
Value[] newValue = new Value[size];
|
90
98
|
for (int i = 0; i < size; i++) {
|
91
|
-
String
|
99
|
+
String pathFragment = ArrayPathToken.getPathFragment(i);
|
100
|
+
String k = new StringBuilder(rootPath).append(pathFragment).toString();
|
92
101
|
if (!shouldVisit(k)) {
|
93
102
|
k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
|
94
103
|
}
|
@@ -105,7 +114,8 @@ public class JsonVisitor
|
|
105
114
|
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
106
115
|
Value k = entry.getKey();
|
107
116
|
Value v = entry.getValue();
|
108
|
-
String
|
117
|
+
String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
|
118
|
+
String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
|
109
119
|
Value r = visit(newPath, v);
|
110
120
|
newValue[i++] = k;
|
111
121
|
newValue[i++] = r;
|
@@ -2,6 +2,7 @@ package org.embulk.filter.timestamp_format;
|
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
4
|
import com.google.common.collect.ImmutableList;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
5
6
|
import org.embulk.config.Config;
|
6
7
|
import org.embulk.config.ConfigDefault;
|
7
8
|
import org.embulk.config.ConfigException;
|
@@ -99,10 +100,9 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
99
100
|
// throw if column does not exist
|
100
101
|
for (ColumnConfig columnConfig : columns) {
|
101
102
|
String name = columnConfig.getName();
|
102
|
-
if (
|
103
|
-
String
|
104
|
-
|
105
|
-
inputSchema.lookupColumn(firstNameWithoutArray);
|
103
|
+
if (PathCompiler.isProbablyJsonPath(name)) {
|
104
|
+
String columnName = JsonPathUtil.getColumnName(name);
|
105
|
+
inputSchema.lookupColumn(columnName);
|
106
106
|
}
|
107
107
|
else {
|
108
108
|
inputSchema.lookupColumn(name);
|
@@ -119,7 +119,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
119
119
|
if (type instanceof JsonType) {
|
120
120
|
throw new ConfigException(String.format("casting to json is not available: \"%s\"", name));
|
121
121
|
}
|
122
|
-
if (
|
122
|
+
if (PathCompiler.isProbablyJsonPath(name) && type instanceof TimestampType) {
|
123
123
|
throw new ConfigException(String.format("casting a json path into timestamp is not available: \"%s\"", name));
|
124
124
|
}
|
125
125
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,28 +58,32 @@ files:
|
|
58
58
|
- bench/gen_dummy.rb
|
59
59
|
- build.gradle
|
60
60
|
- config/checkstyle/checkstyle.xml
|
61
|
-
- example/
|
62
|
-
- example/
|
61
|
+
- example/bracket_notation.txt
|
62
|
+
- example/bracket_notation.yml
|
63
63
|
- example/empty.yml
|
64
|
-
- example/example.jsonl
|
65
64
|
- example/example.yml
|
66
|
-
- example/
|
67
|
-
- example/
|
68
|
-
- example/
|
69
|
-
- example/
|
70
|
-
- example/
|
71
|
-
- example/
|
72
|
-
- example/
|
73
|
-
- example/
|
74
|
-
- example/
|
75
|
-
- example/
|
76
|
-
- example/
|
77
|
-
- example/
|
78
|
-
- example/
|
79
|
-
- example/
|
80
|
-
- example/
|
81
|
-
- example/
|
82
|
-
- example/
|
65
|
+
- example/from_double.csv
|
66
|
+
- example/from_double.txt
|
67
|
+
- example/from_double.yml
|
68
|
+
- example/from_long.csv
|
69
|
+
- example/from_long.txt
|
70
|
+
- example/from_long.yml
|
71
|
+
- example/from_string.csv
|
72
|
+
- example/from_string.txt
|
73
|
+
- example/from_string.yml
|
74
|
+
- example/from_string_auto_java.txt
|
75
|
+
- example/from_string_auto_java.yml
|
76
|
+
- example/from_string_java.txt
|
77
|
+
- example/from_string_java.yml
|
78
|
+
- example/from_timestamp.csv
|
79
|
+
- example/from_timestamp.txt
|
80
|
+
- example/from_timestamp.yml
|
81
|
+
- example/nested.jsonl
|
82
|
+
- example/nested.txt
|
83
|
+
- example/nested.yml
|
84
|
+
- example/timezone.csv
|
85
|
+
- example/timezone.txt
|
86
|
+
- example/timezone.yml
|
83
87
|
- gradle/wrapper/gradle-wrapper.jar
|
84
88
|
- gradle/wrapper/gradle-wrapper.properties
|
85
89
|
- gradlew
|
@@ -89,6 +93,7 @@ files:
|
|
89
93
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java
|
90
94
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
|
91
95
|
- src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
|
96
|
+
- src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java
|
92
97
|
- src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
|
93
98
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
|
94
99
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
|
@@ -102,7 +107,13 @@ files:
|
|
102
107
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
103
108
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
104
109
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
105
|
-
- classpath/
|
110
|
+
- classpath/accessors-smart-1.1.jar
|
111
|
+
- classpath/asm-5.0.3.jar
|
112
|
+
- classpath/commons-lang3-3.4.jar
|
113
|
+
- classpath/embulk-filter-timestamp_format-0.2.4.jar
|
114
|
+
- classpath/json-smart-2.2.1.jar
|
115
|
+
- classpath/JsonPathCompiler-0.1.1.jar
|
116
|
+
- classpath/slf4j-api-1.7.21.jar
|
106
117
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
107
118
|
licenses:
|
108
119
|
- MIT
|
data/example/double.csv
DELETED
data/example/example2.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example2.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
filters:
|
10
|
-
- type: timestamp_format
|
11
|
-
columns:
|
12
|
-
- {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_double.jsonl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
{"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
|
data/example/json_double.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_double.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_unit: ms
|
9
|
-
columns:
|
10
|
-
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.double2, type: long}
|
12
|
-
- {name: $.record.double3, type: double}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_long.jsonl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
{"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
|
data/example/json_long.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_long.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_unit: ms
|
9
|
-
columns:
|
10
|
-
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.long2, type: long}
|
12
|
-
- {name: $.record.long3, type: double}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/json_string.jsonl
DELETED
data/example/json_string.yml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/json_string.jsonl
|
4
|
-
parser:
|
5
|
-
type: json
|
6
|
-
filters:
|
7
|
-
- type: timestamp_format
|
8
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
|
9
|
-
columns:
|
10
|
-
- {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
-
- {name: $.record.string2, type: long, to_unit: ms}
|
12
|
-
- {name: $.record.string3, type: double, to_unit: ms}
|
13
|
-
out:
|
14
|
-
type: "null"
|
data/example/long.csv
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
1436713200100,1436713200100,1436713200100,1436713200100
|
data/example/string.csv
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
2015-07-13,2015-07-13,2015-07-13,2015-07-13
|
2
|
-
2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC
|
3
|
-
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
4
|
-
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
5
|
-
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
6
|
-
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
|
7
|
-
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
|
8
|
-
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
|
9
|
-
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
|
10
|
-
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
|
11
|
-
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
|
12
|
-
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
|
13
|
-
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
|
14
|
-
|
data/example/string_java.yml
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/string.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
- {name: string3, type: string}
|
10
|
-
- {name: string4, type: string}
|
11
|
-
filters:
|
12
|
-
- type: timestamp_format
|
13
|
-
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
|
-
default_to_timezone: "Asia/Taipei"
|
16
|
-
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
|
-
columns:
|
18
|
-
- {name: string1}
|
19
|
-
- {name: string2, type: timestamp}
|
20
|
-
- {name: string3, type: long, to_unit: ms}
|
21
|
-
- {name: string4, type: double, to_unit: ms}
|
22
|
-
out:
|
23
|
-
type: "null"
|