embulk-filter-timestamp_format 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +11 -5
  4. data/build.gradle +3 -2
  5. data/example/bracket_notation.txt +10 -0
  6. data/example/bracket_notation.yml +14 -0
  7. data/example/example.yml +18 -5
  8. data/example/from_double.csv +1 -0
  9. data/example/from_double.txt +9 -0
  10. data/example/{double.yml → from_double.yml} +8 -4
  11. data/example/from_long.csv +1 -0
  12. data/example/from_long.txt +9 -0
  13. data/example/{long.yml → from_long.yml} +8 -4
  14. data/example/from_string.csv +14 -0
  15. data/example/from_string.txt +21 -0
  16. data/example/{string.yml → from_string.yml} +9 -5
  17. data/example/from_string_auto_java.txt +21 -0
  18. data/example/{string_auto_java.yml → from_string_auto_java.yml} +8 -4
  19. data/example/from_string_java.txt +21 -0
  20. data/example/{string_nano.yml → from_string_java.yml} +8 -4
  21. data/example/{timestamp.csv → from_timestamp.csv} +0 -0
  22. data/example/from_timestamp.txt +9 -0
  23. data/example/{timestamp.yml → from_timestamp.yml} +4 -4
  24. data/example/{example.jsonl → nested.jsonl} +0 -0
  25. data/example/nested.txt +10 -0
  26. data/example/nested.yml +14 -0
  27. data/example/{example2.csv → timezone.csv} +0 -0
  28. data/example/timezone.txt +10 -0
  29. data/example/timezone.yml +16 -0
  30. data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +3 -1
  31. data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +4 -4
  32. data/src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java +78 -0
  33. data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +31 -21
  34. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +5 -5
  35. metadata +34 -23
  36. data/example/double.csv +0 -2
  37. data/example/example2.yml +0 -14
  38. data/example/json_double.jsonl +0 -1
  39. data/example/json_double.yml +0 -14
  40. data/example/json_long.jsonl +0 -1
  41. data/example/json_long.yml +0 -14
  42. data/example/json_string.jsonl +0 -2
  43. data/example/json_string.yml +0 -14
  44. data/example/long.csv +0 -1
  45. data/example/string.csv +0 -14
  46. data/example/string_java.yml +0 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7a2a594a84f03137480454f65b2ee9f5e3bbc58a
4
- data.tar.gz: 836fb23b2550507518b6f2de9ca77b3220e09457
3
+ metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
4
+ data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
5
5
  SHA512:
6
- metadata.gz: b7deb792bec505f3cd70e0e191d98473d383d4f58dddba199dad285c47f0cf2fb003fb2e8face50336bfb9577e769b7d673fa99e2ce4be2a21269a0a2b0cb831
7
- data.tar.gz: 0135e7bca5a55b62b31e6768e3b1a7e3e4ebfa1caa66bd289187d1a8fdd512bd270041d626205a10bad1d6abe33e36834a87aea3da3cca93e52cff6c2325ae1d
6
+ metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
7
+ data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.2.4 (2016-11-06)
2
+
3
+ Enhancements:
4
+
5
+ * Support jsonpath bracket notation
6
+
1
7
  # 0.2.3 (2016-10-25)
2
8
 
3
9
  Fixes:
data/README.md CHANGED
@@ -63,17 +63,23 @@ Output will be as:
63
63
 
64
64
  See [./example](./example) for more examples.
65
65
 
66
- ## JSONPath (like) name
66
+ ## JSONPath
67
67
 
68
68
  For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
69
69
 
70
70
  ```
71
- $.payload.key1
72
- $.payload.array[0]
73
- $.payload.array[*]
71
+ name: $.payload.key1
72
+ name: "$.payload.array[0]"
73
+ name: "$.payload.array[*]"
74
+ name: $['payload']['key1.key2']
74
75
  ```
75
76
 
76
- NOTE: JSONPath syntax is not fully supported
77
+ Following operators of JSONPath are not supported:
78
+
79
+ * Multiple properties such as `['name','name']`
80
+ * Multiple array indexes such as `[1,2]`
81
+ * Array slice such as `[1:2]`
82
+ * Filter expression such as `[?(<expression>)]`
77
83
 
78
84
  ## JRuby Timestamp Parser Performance Issue
79
85
 
data/build.gradle CHANGED
@@ -13,14 +13,15 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.3"
16
+ version = "0.2.4"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.8.+"
22
22
  provided "org.embulk:embulk-core:0.8.+"
23
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
23
+ compile "io.github.medjed:JsonPathCompiler:0.1.+"
24
+
24
25
  testCompile "junit:junit:4.+"
25
26
  testCompile "org.embulk:embulk-core:0.7.+:tests"
26
27
  }
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
5
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
+ | record:json |
7
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
+ | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
+ | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/nested.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_to_timezone: "Asia/Tokyo"
9
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
+ columns:
11
+ - {name: "$['record']['timestamp']", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
+ - {name: "$['record']['nested']['nested'][0]['timestamp']", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
13
+ out:
14
+ type: "null"
data/example/example.yml CHANGED
@@ -1,14 +1,27 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/example.jsonl
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
- type: json
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ - {name: record, type: json}
6
12
  filters:
7
13
  - type: timestamp_format
8
- default_to_timezone: "Asia/Tokyo"
14
+ default_from_timezone: "Asia/Taipei"
15
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
16
+ default_to_timezone: "Asia/Taipei"
9
17
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
18
  columns:
11
- - {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
- - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
19
+ - {name: string1, type: string}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
13
26
  out:
14
27
  type: "null"
@@ -0,0 +1 @@
1
+ 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2,"{""double1"":1436713200100.2,""double2"":1436713200100.2,""double3"":1436713200100.2}"
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
+ 2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
5
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
6
+ | double1:string | double2:long | double3:double | double4:timestamp | record:json |
7
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.100199936 | 1,436,713,200 | 1.4367132001002E9 | 2015-07-12 15:00:00.100199936 UTC | {"double2":1436713200,"double3":1.4367132001002E9,"double1":"2015-07-13 00:00:00.100199936"} |
9
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/double.csv
3
+ path_prefix: example/from_double.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,13 +8,17 @@ in:
8
8
  - {name: double2, type: double}
9
9
  - {name: double3, type: double}
10
10
  - {name: double4, type: double}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timestamp_unit: ms
14
15
  columns:
15
16
  - {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
- - {name: double2, type: timestamp}
17
- - {name: double3, type: long}
18
- - {name: double4, type: double}
17
+ - {name: double2, type: long}
18
+ - {name: double3, type: double}
19
+ - {name: double4, type: timestamp}
20
+ - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
21
+ - {name: $.record.double2, type: long}
22
+ - {name: $.record.double3, type: double}
19
23
  out:
20
24
  type: "null"
@@ -0,0 +1 @@
1
+ 1436713200100,1436713200100,1436713200100,1436713200100,"{""long1"":1436713200100,""long2"":1436713200100,""long3"":1436713200100}"
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
+ 2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
5
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
6
+ | long1:string | long2:long | long3:double | long4:timestamp | record:json |
7
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.100000000 | 1,436,713,200 | 1.4367132E9 | 2015-07-12 15:00:00.100 UTC | {"long3":1.4367132E9,"long2":1436713200,"long1":"2015-07-13 00:00:00.100000000"} |
9
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/long.csv
3
+ path_prefix: example/from_long.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,13 +8,17 @@ in:
8
8
  - {name: long2, type: long}
9
9
  - {name: long3, type: long}
10
10
  - {name: long4, type: long}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timestamp_unit: ms
14
15
  columns:
15
16
  - {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
- - {name: long2, type: timestamp}
17
- - {name: long3, type: long}
18
- - {name: long4, type: double}
17
+ - {name: long2, type: long}
18
+ - {name: long3, type: double}
19
+ - {name: long4, type: timestamp}
20
+ - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
21
+ - {name: $.record.long2, type: long}
22
+ - {name: $.record.long3, type: double}
19
23
  out:
20
24
  type: "null"
@@ -0,0 +1,14 @@
1
+ 2015-07-13,2015-07-13,2015-07-13,2015-07-13,"{""string1"":""2015-07-13"" ,""string2"":""2015-07-13"" ,""string3"":""2015-07-13"" }"
2
+ 2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,"{""string1"":""2015-07-13 UTC"" ,""string2"":""2015-07-13 UTC"" ,""string3"":""2015-07-13 UTC"" }"
3
+ 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,"{""string1"":""2015-07-13 00:00:00"" ,""string2"":""2015-07-13 00:00:00"" ,""string3"":""2015-07-13 00:00:00"" }"
4
+ 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,"{""string1"":""2015-07-12 16:00:00 UTC"" ,""string2"":""2015-07-12 16:00:00 UTC"" ,""string3"":""2015-07-12 16:00:00 UTC"" }"
5
+ 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,"{""string1"":""2015-07-12 16:00:00.1 UTC"" ,""string2"":""2015-07-12 16:00:00.1 UTC"" ,""string3"":""2015-07-12 16:00:00.1 UTC"" }"
6
+ 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,"{""string1"":""2015-07-12 16:00:00.12 UTC"" ,""string2"":""2015-07-12 16:00:00.12 UTC"" ,""string3"":""2015-07-12 16:00:00.12 UTC"" }"
7
+ 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,"{""string1"":""2015-07-12 16:00:00.123 UTC"" ,""string2"":""2015-07-12 16:00:00.123 UTC"" ,""string3"":""2015-07-12 16:00:00.123 UTC"" }"
8
+ 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,"{""string1"":""2015-07-12 16:00:00.1234 UTC"" ,""string2"":""2015-07-12 16:00:00.1234 UTC"" ,""string3"":""2015-07-12 16:00:00.1234 UTC"" }"
9
+ 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,"{""string1"":""2015-07-12 16:00:00.12345 UTC"" ,""string2"":""2015-07-12 16:00:00.12345 UTC"" ,""string3"":""2015-07-12 16:00:00.12345 UTC"" }"
10
+ 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,"{""string1"":""2015-07-12 16:00:00.123456 UTC"" ,""string2"":""2015-07-12 16:00:00.123456 UTC"" ,""string3"":""2015-07-12 16:00:00.123456 UTC"" }"
11
+ 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,"{""string1"":""2015-07-12 16:00:00.1234567 UTC"" ,""string2"":""2015-07-12 16:00:00.1234567 UTC"" ,""string3"":""2015-07-12 16:00:00.1234567 UTC"" }"
12
+ 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,"{""string1"":""2015-07-12 16:00:00.12345678 UTC"" ,""string2"":""2015-07-12 16:00:00.12345678 UTC"" ,""string3"":""2015-07-12 16:00:00.12345678 UTC"" }"
13
+ 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,"{""string1"":""2015-07-12 16:00:00.123456789 UTC"",""string2"":""2015-07-12 16:00:00.123456789 UTC"",""string3"":""2015-07-12 16:00:00.123456789 UTC""}"
14
+
@@ -0,0 +1,21 @@
1
+ 2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
19
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
21
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -15,9 +16,12 @@ filters:
15
16
  default_to_timezone: "Asia/Taipei"
16
17
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
18
  columns:
18
- - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
19
+ - {name: string1, type: string}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
22
26
  out:
23
27
  type: "null"
@@ -0,0 +1,21 @@
1
+ 2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
+ | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
19
+ | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
20
+ | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
21
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -17,8 +18,11 @@ filters:
17
18
  timestamp_parser: auto_java
18
19
  columns:
19
20
  - {name: string1}
20
- - {name: string2, type: timestamp}
21
- - {name: string3, type: long, to_unit: ms}
22
- - {name: string4, type: double, to_unit: ms}
21
+ - {name: string2, type: long, to_unit: ms}
22
+ - {name: string3, type: double, to_unit: ms}
23
+ - {name: string4, type: timestamp}
24
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
25
+ - {name: $.record.string2, type: long, to_unit: ms}
26
+ - {name: $.record.string3, type: double, to_unit: ms}
23
27
  out:
24
28
  type: "null"
@@ -0,0 +1,21 @@
1
+ 2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
9
+ | 2015-07-13 08:00:00.000000 +0800 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000 +0800"} |
10
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
11
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
12
+ | 2015-07-13 00:00:00.100000 +0800 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000 +0800"} |
13
+ | 2015-07-13 00:00:00.120000 +0800 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000 +0800"} |
14
+ | 2015-07-13 00:00:00.123000 +0800 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000 +0800"} |
15
+ | 2015-07-13 00:00:00.123400 +0800 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400 +0800"} |
16
+ | 2015-07-13 00:00:00.123450 +0800 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450 +0800"} |
17
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
18
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
19
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
20
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
21
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -16,8 +17,11 @@ filters:
16
17
  default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
17
18
  columns:
18
19
  - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
22
26
  out:
23
27
  type: "null"
File without changes
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
+ 2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
5
+ +-------------------------------+-------------------+-------------------+-----------------------------+
6
+ | timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
7
+ +-------------------------------+-------------------+-------------------+-----------------------------+
8
+ | 2015-07-13 00:00:00.100000000 | 1,436,713,200,100 | 1.4367132001E12 | 2015-07-12 15:00:00.100 UTC |
9
+ +-------------------------------+-------------------+-------------------+-----------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/timestamp.csv
3
+ path_prefix: example/from_timestamp.csv
4
4
  parser:
5
5
  type: csv
6
6
  default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
@@ -15,8 +15,8 @@ filters:
15
15
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
16
16
  columns:
17
17
  - {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
18
- - {name: timestamp2, type: timestamp}
19
- - {name: timestamp3, type: long, to_unit: ms}
20
- - {name: timestamp4, type: double, to_unit: ms}
18
+ - {name: timestamp2, type: long, to_unit: ms}
19
+ - {name: timestamp3, type: double, to_unit: ms}
20
+ - {name: timestamp4, type: timestamp}
21
21
  out:
22
22
  type: "null"
File without changes
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:25:21.964 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:25:22.829 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:25:22.844 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2016-11-06 14:25:22.850 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
5
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
+ | record:json |
7
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
+ | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
+ | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/nested.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_to_timezone: "Asia/Tokyo"
9
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
+ columns:
11
+ - {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
+ - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
13
+ out:
14
+ type: "null"
File without changes
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:25:02.170 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:25:03.024 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:25:03.039 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'timezone.csv'
4
+ 2016-11-06 14:25:03.043 +0900 [INFO] (0001:preview): Loading files [example/timezone.csv]
5
+ +----------------+-------------------------------------+
6
+ | string1:string | string2:string |
7
+ +----------------+-------------------------------------+
8
+ | 2015-07-13 | 2015-07-13 00:00:00.000000000 +0900 |
9
+ | 2015-07-13 | 2015-07-13 00:00:00.100000000 +0900 |
10
+ +----------------+-------------------------------------+
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/timezone.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ filters:
10
+ - type: timestamp_format
11
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
12
+ columns:
13
+ - {name: string1, to_format: "%Y-%m-%d", to_timezone: "Asia/Tokyo"}
14
+ - {name: string2, to_format: "%Y-%m-%d %H:%M:%S.%N %z", to_timezone: "Asia/Tokyo"}
15
+ out:
16
+ type: "null"
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
4
  import org.embulk.filter.timestamp_format.cast.DoubleCast;
4
5
  import org.embulk.filter.timestamp_format.cast.LongCast;
5
6
  import org.embulk.filter.timestamp_format.cast.StringCast;
@@ -237,7 +238,8 @@ public class ColumnCaster
237
238
 
238
239
  public void setFromJson(Column outputColumn, Value value)
239
240
  {
240
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
241
+ String pathFragment = PropertyPathToken.getPathFragment(outputColumn.getName());
242
+ String jsonPath = new StringBuilder("$").append(pathFragment).toString();
241
243
  pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value));
242
244
  }
243
245
  }
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
3
4
  import org.embulk.spi.DataException;
4
5
  import org.embulk.spi.PageReader;
5
6
  import org.embulk.spi.Schema;
@@ -48,10 +49,9 @@ public class ColumnVisitorImpl
48
49
  // columnName => Boolean to avoid unnecessary cast
49
50
  for (ColumnConfig columnConfig : task.getColumns()) {
50
51
  String name = columnConfig.getName();
51
- if (name.startsWith("$.")) {
52
- String firstName = name.split("\\.", 3)[1]; // check only top level column name
53
- String firstPartName = firstName.split("\\[")[0];
54
- shouldCastSet.add(firstPartName);
52
+ if (PathCompiler.isProbablyJsonPath(name)) {
53
+ String columnName = JsonPathUtil.getColumnName(name);
54
+ shouldCastSet.add(columnName);
55
55
  continue;
56
56
  }
57
57
  shouldCastSet.add(name);
@@ -0,0 +1,78 @@
1
+ package org.embulk.filter.timestamp_format;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ private JsonPathUtil() {}
18
+
19
+ public static String getColumnName(String jsonPath)
20
+ {
21
+ Path compiledPath;
22
+ try {
23
+ compiledPath = PathCompiler.compile(jsonPath);
24
+ }
25
+ catch (InvalidPathException e) {
26
+ throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
27
+ }
28
+ PathToken pathToken = compiledPath.getRoot();
29
+ pathToken = pathToken.next(); // skip $
30
+ return ((PropertyPathToken) pathToken).getProperties().get(0);
31
+ }
32
+
33
+ public static void assertJsonPathFormat(String path)
34
+ {
35
+ Path compiledPath;
36
+ try {
37
+ compiledPath = PathCompiler.compile(path);
38
+ }
39
+ catch (InvalidPathException e) {
40
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
41
+ }
42
+ PathToken pathToken = compiledPath.getRoot();
43
+ while (true) {
44
+ assertSupportedPathToken(pathToken, path);
45
+ if (pathToken.isLeaf()) {
46
+ break;
47
+ }
48
+ pathToken = pathToken.next();
49
+ }
50
+ }
51
+
52
+ protected static void assertSupportedPathToken(PathToken pathToken, String path)
53
+ {
54
+ if (pathToken instanceof ArrayPathToken) {
55
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
56
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
57
+ }
58
+ else if (pathToken instanceof ScanPathToken) {
59
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
60
+ }
61
+ else if (pathToken instanceof FunctionPathToken) {
62
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
63
+ }
64
+ else if (pathToken instanceof PredicatePathToken) {
65
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
66
+ }
67
+ }
68
+
69
+ protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
70
+ {
71
+ if (arrayIndexOperation == null) {
72
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
73
+ }
74
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
75
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
76
+ }
77
+ }
78
+ }
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
8
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
4
9
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask;
5
10
 
@@ -28,19 +33,32 @@ public class JsonVisitor
28
33
  this.task = task;
29
34
  this.jsonCaster = jsonCaster;
30
35
 
36
+ assertJsonPathFormat();
31
37
  buildJsonPathColumnConfigMap();
32
38
  buildShouldVisitSet();
33
39
  }
34
40
 
41
+ private void assertJsonPathFormat()
42
+ {
43
+ for (ColumnConfig columnConfig : task.getColumns()) {
44
+ String name = columnConfig.getName();
45
+ if (!PathCompiler.isProbablyJsonPath(name)) {
46
+ continue;
47
+ }
48
+ JsonPathUtil.assertJsonPathFormat(name);
49
+ }
50
+ }
51
+
35
52
  private void buildJsonPathColumnConfigMap()
36
53
  {
37
54
  // json path => Type
38
55
  for (ColumnConfig columnConfig : task.getColumns()) {
39
56
  String name = columnConfig.getName();
40
- if (!name.startsWith("$.")) {
57
+ if (!PathCompiler.isProbablyJsonPath(name)) {
41
58
  continue;
42
59
  }
43
- this.jsonPathColumnConfigMap.put(name, columnConfig);
60
+ Path compiledPath = PathCompiler.compile(name);
61
+ this.jsonPathColumnConfigMap.put(compiledPath.toString(), columnConfig);
44
62
  }
45
63
  }
46
64
 
@@ -49,26 +67,16 @@ public class JsonVisitor
49
67
  // json partial path => Boolean to avoid unnecessary type: json visit
50
68
  for (ColumnConfig columnConfig : task.getColumns()) {
51
69
  String name = columnConfig.getName();
52
- if (!name.startsWith("$.")) {
70
+ if (! PathCompiler.isProbablyJsonPath(name)) {
53
71
  continue;
54
72
  }
55
- String[] parts = name.split("\\.");
73
+ Path compiledPath = PathCompiler.compile(name);
74
+ PathToken parts = compiledPath.getRoot();
56
75
  StringBuilder partialPath = new StringBuilder("$");
57
- for (int i = 1; i < parts.length; i++) {
58
- if (parts[i].contains("[")) {
59
- String[] arrayParts = parts[i].split("\\[");
60
- partialPath.append(".").append(arrayParts[0]);
61
- this.shouldVisitSet.add(partialPath.toString());
62
- for (int j = 1; j < arrayParts.length; j++) {
63
- // Support both [0] and [*]
64
- partialPath.append("[").append(arrayParts[j]);
65
- this.shouldVisitSet.add(partialPath.toString());
66
- }
67
- }
68
- else {
69
- partialPath.append(".").append(parts[i]);
70
- this.shouldVisitSet.add(partialPath.toString());
71
- }
76
+ while (! parts.isLeaf()) {
77
+ parts = parts.next(); // first next() skips "$"
78
+ partialPath.append(parts.getPathFragment());
79
+ this.shouldVisitSet.add(partialPath.toString());
72
80
  }
73
81
  }
74
82
  }
@@ -88,7 +96,8 @@ public class JsonVisitor
88
96
  int size = arrayValue.size();
89
97
  Value[] newValue = new Value[size];
90
98
  for (int i = 0; i < size; i++) {
91
- String k = new StringBuilder(rootPath).append("[").append(Integer.toString(i)).append("]").toString();
99
+ String pathFragment = ArrayPathToken.getPathFragment(i);
100
+ String k = new StringBuilder(rootPath).append(pathFragment).toString();
92
101
  if (!shouldVisit(k)) {
93
102
  k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
94
103
  }
@@ -105,7 +114,8 @@ public class JsonVisitor
105
114
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
106
115
  Value k = entry.getKey();
107
116
  Value v = entry.getValue();
108
- String newPath = new StringBuilder(rootPath).append(".").append(k.asStringValue().asString()).toString();
117
+ String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
118
+ String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
109
119
  Value r = visit(newPath, v);
110
120
  newValue[i++] = k;
111
121
  newValue[i++] = r;
@@ -2,6 +2,7 @@ package org.embulk.filter.timestamp_format;
2
2
 
3
3
  import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
5
6
  import org.embulk.config.Config;
6
7
  import org.embulk.config.ConfigDefault;
7
8
  import org.embulk.config.ConfigException;
@@ -99,10 +100,9 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
99
100
  // throw if column does not exist
100
101
  for (ColumnConfig columnConfig : columns) {
101
102
  String name = columnConfig.getName();
102
- if (name.startsWith("$.")) {
103
- String firstName = name.split("\\.", 3)[1]; // check only top level column name
104
- String firstNameWithoutArray = firstName.split("\\[")[0];
105
- inputSchema.lookupColumn(firstNameWithoutArray);
103
+ if (PathCompiler.isProbablyJsonPath(name)) {
104
+ String columnName = JsonPathUtil.getColumnName(name);
105
+ inputSchema.lookupColumn(columnName);
106
106
  }
107
107
  else {
108
108
  inputSchema.lookupColumn(name);
@@ -119,7 +119,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
119
119
  if (type instanceof JsonType) {
120
120
  throw new ConfigException(String.format("casting to json is not available: \"%s\"", name));
121
121
  }
122
- if (name.startsWith("$.") && type instanceof TimestampType) {
122
+ if (PathCompiler.isProbablyJsonPath(name) && type instanceof TimestampType) {
123
123
  throw new ConfigException(String.format("casting a json path into timestamp is not available: \"%s\"", name));
124
124
  }
125
125
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-25 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,28 +58,32 @@ files:
58
58
  - bench/gen_dummy.rb
59
59
  - build.gradle
60
60
  - config/checkstyle/checkstyle.xml
61
- - example/double.csv
62
- - example/double.yml
61
+ - example/bracket_notation.txt
62
+ - example/bracket_notation.yml
63
63
  - example/empty.yml
64
- - example/example.jsonl
65
64
  - example/example.yml
66
- - example/example2.csv
67
- - example/example2.yml
68
- - example/json_double.jsonl
69
- - example/json_double.yml
70
- - example/json_long.jsonl
71
- - example/json_long.yml
72
- - example/json_string.jsonl
73
- - example/json_string.yml
74
- - example/long.csv
75
- - example/long.yml
76
- - example/string.csv
77
- - example/string.yml
78
- - example/string_auto_java.yml
79
- - example/string_java.yml
80
- - example/string_nano.yml
81
- - example/timestamp.csv
82
- - example/timestamp.yml
65
+ - example/from_double.csv
66
+ - example/from_double.txt
67
+ - example/from_double.yml
68
+ - example/from_long.csv
69
+ - example/from_long.txt
70
+ - example/from_long.yml
71
+ - example/from_string.csv
72
+ - example/from_string.txt
73
+ - example/from_string.yml
74
+ - example/from_string_auto_java.txt
75
+ - example/from_string_auto_java.yml
76
+ - example/from_string_java.txt
77
+ - example/from_string_java.yml
78
+ - example/from_timestamp.csv
79
+ - example/from_timestamp.txt
80
+ - example/from_timestamp.yml
81
+ - example/nested.jsonl
82
+ - example/nested.txt
83
+ - example/nested.yml
84
+ - example/timezone.csv
85
+ - example/timezone.txt
86
+ - example/timezone.yml
83
87
  - gradle/wrapper/gradle-wrapper.jar
84
88
  - gradle/wrapper/gradle-wrapper.properties
85
89
  - gradlew
@@ -89,6 +93,7 @@ files:
89
93
  - src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java
90
94
  - src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
91
95
  - src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
96
+ - src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java
92
97
  - src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
93
98
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
94
99
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
@@ -102,7 +107,13 @@ files:
102
107
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
103
108
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
104
109
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
105
- - classpath/embulk-filter-timestamp_format-0.2.3.jar
110
+ - classpath/accessors-smart-1.1.jar
111
+ - classpath/asm-5.0.3.jar
112
+ - classpath/commons-lang3-3.4.jar
113
+ - classpath/embulk-filter-timestamp_format-0.2.4.jar
114
+ - classpath/json-smart-2.2.1.jar
115
+ - classpath/JsonPathCompiler-0.1.1.jar
116
+ - classpath/slf4j-api-1.7.21.jar
106
117
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
107
118
  licenses:
108
119
  - MIT
data/example/double.csv DELETED
@@ -1,2 +0,0 @@
1
- 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2
2
-
data/example/example2.yml DELETED
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example2.csv
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- filters:
10
- - type: timestamp_format
11
- columns:
12
- - {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
13
- out:
14
- type: "null"
@@ -1 +0,0 @@
1
- {"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_double.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_unit: ms
9
- columns:
10
- - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.double2, type: long}
12
- - {name: $.record.double3, type: double}
13
- out:
14
- type: "null"
@@ -1 +0,0 @@
1
- {"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_long.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_unit: ms
9
- columns:
10
- - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.long2, type: long}
12
- - {name: $.record.long3, type: double}
13
- out:
14
- type: "null"
@@ -1,2 +0,0 @@
1
- {"string1":"2015-07-12 15:00:00 UTC","string2":"2015-07-12 15:00:00 UTC","string3":"2015-07-12 15:00:00 UTC"}
2
- {"string1":"2015-07-12 15:00:00.1 UTC","string2":"2015-07-12 15:00:00.1 UTC","string3":"2015-07-12 15:00:00.1 UTC"}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_string.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
9
- columns:
10
- - {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.string2, type: long, to_unit: ms}
12
- - {name: $.record.string3, type: double, to_unit: ms}
13
- out:
14
- type: "null"
data/example/long.csv DELETED
@@ -1 +0,0 @@
1
- 1436713200100,1436713200100,1436713200100,1436713200100
data/example/string.csv DELETED
@@ -1,14 +0,0 @@
1
- 2015-07-13,2015-07-13,2015-07-13,2015-07-13
2
- 2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC
3
- 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
4
- 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
5
- 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
6
- 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
7
- 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
8
- 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
9
- 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
10
- 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
11
- 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
12
- 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
13
- 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
14
-
@@ -1,23 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/string.csv
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- - {name: string3, type: string}
10
- - {name: string4, type: string}
11
- filters:
12
- - type: timestamp_format
13
- default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
- default_to_timezone: "Asia/Taipei"
16
- default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
- columns:
18
- - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
22
- out:
23
- type: "null"