embulk-filter-timestamp_format 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +11 -5
  4. data/build.gradle +3 -2
  5. data/example/bracket_notation.txt +10 -0
  6. data/example/bracket_notation.yml +14 -0
  7. data/example/example.yml +18 -5
  8. data/example/from_double.csv +1 -0
  9. data/example/from_double.txt +9 -0
  10. data/example/{double.yml → from_double.yml} +8 -4
  11. data/example/from_long.csv +1 -0
  12. data/example/from_long.txt +9 -0
  13. data/example/{long.yml → from_long.yml} +8 -4
  14. data/example/from_string.csv +14 -0
  15. data/example/from_string.txt +21 -0
  16. data/example/{string.yml → from_string.yml} +9 -5
  17. data/example/from_string_auto_java.txt +21 -0
  18. data/example/{string_auto_java.yml → from_string_auto_java.yml} +8 -4
  19. data/example/from_string_java.txt +21 -0
  20. data/example/{string_nano.yml → from_string_java.yml} +8 -4
  21. data/example/{timestamp.csv → from_timestamp.csv} +0 -0
  22. data/example/from_timestamp.txt +9 -0
  23. data/example/{timestamp.yml → from_timestamp.yml} +4 -4
  24. data/example/{example.jsonl → nested.jsonl} +0 -0
  25. data/example/nested.txt +10 -0
  26. data/example/nested.yml +14 -0
  27. data/example/{example2.csv → timezone.csv} +0 -0
  28. data/example/timezone.txt +10 -0
  29. data/example/timezone.yml +16 -0
  30. data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +3 -1
  31. data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +4 -4
  32. data/src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java +78 -0
  33. data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +31 -21
  34. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +5 -5
  35. metadata +34 -23
  36. data/example/double.csv +0 -2
  37. data/example/example2.yml +0 -14
  38. data/example/json_double.jsonl +0 -1
  39. data/example/json_double.yml +0 -14
  40. data/example/json_long.jsonl +0 -1
  41. data/example/json_long.yml +0 -14
  42. data/example/json_string.jsonl +0 -2
  43. data/example/json_string.yml +0 -14
  44. data/example/long.csv +0 -1
  45. data/example/string.csv +0 -14
  46. data/example/string_java.yml +0 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7a2a594a84f03137480454f65b2ee9f5e3bbc58a
4
- data.tar.gz: 836fb23b2550507518b6f2de9ca77b3220e09457
3
+ metadata.gz: b154ff10f65055de61c4bc6849cf97b64a280e38
4
+ data.tar.gz: a452a5091c1128268b22cdb74b462b18ab15457a
5
5
  SHA512:
6
- metadata.gz: b7deb792bec505f3cd70e0e191d98473d383d4f58dddba199dad285c47f0cf2fb003fb2e8face50336bfb9577e769b7d673fa99e2ce4be2a21269a0a2b0cb831
7
- data.tar.gz: 0135e7bca5a55b62b31e6768e3b1a7e3e4ebfa1caa66bd289187d1a8fdd512bd270041d626205a10bad1d6abe33e36834a87aea3da3cca93e52cff6c2325ae1d
6
+ metadata.gz: 381c6bf3590dd48476d6b30aedc0211896bd057740e9f9a0d5f1032af3d12f545046129580ff62efd4f1f671d71f3a37a04183c37fe2083801babb8b0394b567
7
+ data.tar.gz: 73e0ab777b21d4f07e640a8f5cb06607b4f1ee6bfdaa20e47896901aeb2bd68009095b749b754981a2a2d4af62e08f68fc5f5e941b630c761376c9045b7add95
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.2.4 (2016-11-06)
2
+
3
+ Enhancements:
4
+
5
+ * Support jsonpath bracket notation
6
+
1
7
  # 0.2.3 (2016-10-25)
2
8
 
3
9
  Fixes:
data/README.md CHANGED
@@ -63,17 +63,23 @@ Output will be as:
63
63
 
64
64
  See [./example](./example) for more examples.
65
65
 
66
- ## JSONPath (like) name
66
+ ## JSONPath
67
67
 
68
68
  For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
69
69
 
70
70
  ```
71
- $.payload.key1
72
- $.payload.array[0]
73
- $.payload.array[*]
71
+ name: $.payload.key1
72
+ name: "$.payload.array[0]"
73
+ name: "$.payload.array[*]"
74
+ name: $['payload']['key1.key2']
74
75
  ```
75
76
 
76
- NOTE: JSONPath syntax is not fully supported
77
+ Following operators of JSONPath are not supported:
78
+
79
+ * Multiple properties such as `['name','name']`
80
+ * Multiple array indexes such as `[1,2]`
81
+ * Array slice such as `[1:2]`
82
+ * Filter expression such as `[?(<expression>)]`
77
83
 
78
84
  ## JRuby Timestamp Parser Performance Issue
79
85
 
data/build.gradle CHANGED
@@ -13,14 +13,15 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.3"
16
+ version = "0.2.4"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.8.+"
22
22
  provided "org.embulk:embulk-core:0.8.+"
23
- // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
23
+ compile "io.github.medjed:JsonPathCompiler:0.1.+"
24
+
24
25
  testCompile "junit:junit:4.+"
25
26
  testCompile "org.embulk:embulk-core:0.7.+:tests"
26
27
  }
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:37:03.501 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:37:04.349 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:37:04.365 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2016-11-06 14:37:04.371 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
5
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
+ | record:json |
7
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
+ | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
+ | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/nested.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_to_timezone: "Asia/Tokyo"
9
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
+ columns:
11
+ - {name: "$['record']['timestamp']", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
+ - {name: "$['record']['nested']['nested'][0]['timestamp']", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
13
+ out:
14
+ type: "null"
data/example/example.yml CHANGED
@@ -1,14 +1,27 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/example.jsonl
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
- type: json
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ - {name: record, type: json}
6
12
  filters:
7
13
  - type: timestamp_format
8
- default_to_timezone: "Asia/Tokyo"
14
+ default_from_timezone: "Asia/Taipei"
15
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
16
+ default_to_timezone: "Asia/Taipei"
9
17
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
18
  columns:
11
- - {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
- - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
19
+ - {name: string1, type: string}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
13
26
  out:
14
27
  type: "null"
@@ -0,0 +1 @@
1
+ 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2,"{""double1"":1436713200100.2,""double2"":1436713200100.2,""double3"":1436713200100.2}"
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:07:43.984 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:07:44.752 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:07:44.767 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv'
4
+ 2016-11-06 13:07:44.771 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv]
5
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
6
+ | double1:string | double2:long | double3:double | double4:timestamp | record:json |
7
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.100199936 | 1,436,713,200 | 1.4367132001002E9 | 2015-07-12 15:00:00.100199936 UTC | {"double2":1436713200,"double3":1.4367132001002E9,"double1":"2015-07-13 00:00:00.100199936"} |
9
+ +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/double.csv
3
+ path_prefix: example/from_double.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,13 +8,17 @@ in:
8
8
  - {name: double2, type: double}
9
9
  - {name: double3, type: double}
10
10
  - {name: double4, type: double}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timestamp_unit: ms
14
15
  columns:
15
16
  - {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
- - {name: double2, type: timestamp}
17
- - {name: double3, type: long}
18
- - {name: double4, type: double}
17
+ - {name: double2, type: long}
18
+ - {name: double3, type: double}
19
+ - {name: double4, type: timestamp}
20
+ - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
21
+ - {name: $.record.double2, type: long}
22
+ - {name: $.record.double3, type: double}
19
23
  out:
20
24
  type: "null"
@@ -0,0 +1 @@
1
+ 1436713200100,1436713200100,1436713200100,1436713200100,"{""long1"":1436713200100,""long2"":1436713200100,""long3"":1436713200100}"
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:11:24.079 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:11:24.842 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:11:24.858 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv'
4
+ 2016-11-06 13:11:24.862 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv]
5
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
6
+ | long1:string | long2:long | long3:double | long4:timestamp | record:json |
7
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.100000000 | 1,436,713,200 | 1.4367132E9 | 2015-07-12 15:00:00.100 UTC | {"long3":1.4367132E9,"long2":1436713200,"long1":"2015-07-13 00:00:00.100000000"} |
9
+ +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/long.csv
3
+ path_prefix: example/from_long.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,13 +8,17 @@ in:
8
8
  - {name: long2, type: long}
9
9
  - {name: long3, type: long}
10
10
  - {name: long4, type: long}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timestamp_unit: ms
14
15
  columns:
15
16
  - {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
- - {name: long2, type: timestamp}
17
- - {name: long3, type: long}
18
- - {name: long4, type: double}
17
+ - {name: long2, type: long}
18
+ - {name: long3, type: double}
19
+ - {name: long4, type: timestamp}
20
+ - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
21
+ - {name: $.record.long2, type: long}
22
+ - {name: $.record.long3, type: double}
19
23
  out:
20
24
  type: "null"
@@ -0,0 +1,14 @@
1
+ 2015-07-13,2015-07-13,2015-07-13,2015-07-13,"{""string1"":""2015-07-13"" ,""string2"":""2015-07-13"" ,""string3"":""2015-07-13"" }"
2
+ 2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,"{""string1"":""2015-07-13 UTC"" ,""string2"":""2015-07-13 UTC"" ,""string3"":""2015-07-13 UTC"" }"
3
+ 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,"{""string1"":""2015-07-13 00:00:00"" ,""string2"":""2015-07-13 00:00:00"" ,""string3"":""2015-07-13 00:00:00"" }"
4
+ 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,"{""string1"":""2015-07-12 16:00:00 UTC"" ,""string2"":""2015-07-12 16:00:00 UTC"" ,""string3"":""2015-07-12 16:00:00 UTC"" }"
5
+ 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,"{""string1"":""2015-07-12 16:00:00.1 UTC"" ,""string2"":""2015-07-12 16:00:00.1 UTC"" ,""string3"":""2015-07-12 16:00:00.1 UTC"" }"
6
+ 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,"{""string1"":""2015-07-12 16:00:00.12 UTC"" ,""string2"":""2015-07-12 16:00:00.12 UTC"" ,""string3"":""2015-07-12 16:00:00.12 UTC"" }"
7
+ 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,"{""string1"":""2015-07-12 16:00:00.123 UTC"" ,""string2"":""2015-07-12 16:00:00.123 UTC"" ,""string3"":""2015-07-12 16:00:00.123 UTC"" }"
8
+ 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,"{""string1"":""2015-07-12 16:00:00.1234 UTC"" ,""string2"":""2015-07-12 16:00:00.1234 UTC"" ,""string3"":""2015-07-12 16:00:00.1234 UTC"" }"
9
+ 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,"{""string1"":""2015-07-12 16:00:00.12345 UTC"" ,""string2"":""2015-07-12 16:00:00.12345 UTC"" ,""string3"":""2015-07-12 16:00:00.12345 UTC"" }"
10
+ 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,"{""string1"":""2015-07-12 16:00:00.123456 UTC"" ,""string2"":""2015-07-12 16:00:00.123456 UTC"" ,""string3"":""2015-07-12 16:00:00.123456 UTC"" }"
11
+ 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,"{""string1"":""2015-07-12 16:00:00.1234567 UTC"" ,""string2"":""2015-07-12 16:00:00.1234567 UTC"" ,""string3"":""2015-07-12 16:00:00.1234567 UTC"" }"
12
+ 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,"{""string1"":""2015-07-12 16:00:00.12345678 UTC"" ,""string2"":""2015-07-12 16:00:00.12345678 UTC"" ,""string3"":""2015-07-12 16:00:00.12345678 UTC"" }"
13
+ 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,"{""string1"":""2015-07-12 16:00:00.123456789 UTC"",""string2"":""2015-07-12 16:00:00.123456789 UTC"",""string3"":""2015-07-12 16:00:00.123456789 UTC""}"
14
+
@@ -0,0 +1,21 @@
1
+ 2016-11-06 13:28:37.337 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:28:38.096 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:28:38.112 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 13:28:38.116 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
19
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
20
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
21
+ +-------------------------------+-------------------+----------------------+--------------------------------+----------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -15,9 +16,12 @@ filters:
15
16
  default_to_timezone: "Asia/Taipei"
16
17
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
18
  columns:
18
- - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
19
+ - {name: string1, type: string}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
22
26
  out:
23
27
  type: "null"
@@ -0,0 +1,21 @@
1
+ 2016-11-06 14:15:56.683 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:15:57.554 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:15:57.568 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 14:15:57.573 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
9
+ | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} |
10
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
11
+ | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} |
12
+ | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} |
13
+ | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} |
14
+ | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} |
15
+ | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} |
16
+ | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} |
17
+ | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} |
18
+ | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} |
19
+ | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} |
20
+ | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} |
21
+ +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -17,8 +18,11 @@ filters:
17
18
  timestamp_parser: auto_java
18
19
  columns:
19
20
  - {name: string1}
20
- - {name: string2, type: timestamp}
21
- - {name: string3, type: long, to_unit: ms}
22
- - {name: string4, type: double, to_unit: ms}
21
+ - {name: string2, type: long, to_unit: ms}
22
+ - {name: string3, type: double, to_unit: ms}
23
+ - {name: string4, type: timestamp}
24
+ - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"}
25
+ - {name: $.record.string2, type: long, to_unit: ms}
26
+ - {name: $.record.string3, type: double, to_unit: ms}
23
27
  out:
24
28
  type: "null"
@@ -0,0 +1,21 @@
1
+ 2016-11-06 14:16:05.976 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:16:06.833 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:16:06.848 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv'
4
+ 2016-11-06 14:16:06.852 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv]
5
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
6
+ | string1:string | string2:long | string3:double | string4:timestamp | record:json |
7
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
9
+ | 2015-07-13 08:00:00.000000 +0800 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000 +0800"} |
10
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
11
+ | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} |
12
+ | 2015-07-13 00:00:00.100000 +0800 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000 +0800"} |
13
+ | 2015-07-13 00:00:00.120000 +0800 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000 +0800"} |
14
+ | 2015-07-13 00:00:00.123000 +0800 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000 +0800"} |
15
+ | 2015-07-13 00:00:00.123400 +0800 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400 +0800"} |
16
+ | 2015-07-13 00:00:00.123450 +0800 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450 +0800"} |
17
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
18
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
19
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
20
+ | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} |
21
+ +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string.csv
3
+ path_prefix: example/from_string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -8,6 +8,7 @@ in:
8
8
  - {name: string2, type: string}
9
9
  - {name: string3, type: string}
10
10
  - {name: string4, type: string}
11
+ - {name: record, type: json}
11
12
  filters:
12
13
  - type: timestamp_format
13
14
  default_from_timezone: "Asia/Taipei"
@@ -16,8 +17,11 @@ filters:
16
17
  default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
17
18
  columns:
18
19
  - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
20
+ - {name: string2, type: long, to_unit: ms}
21
+ - {name: string3, type: double, to_unit: ms}
22
+ - {name: string4, type: timestamp}
23
+ - {name: $.record.string1}
24
+ - {name: $.record.string2, type: long, to_unit: ms}
25
+ - {name: $.record.string3, type: double, to_unit: ms}
22
26
  out:
23
27
  type: "null"
File without changes
@@ -0,0 +1,9 @@
1
+ 2016-11-06 13:32:15.784 +0900: Embulk v0.8.6
2
+ 2016-11-06 13:32:16.556 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 13:32:16.571 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv'
4
+ 2016-11-06 13:32:16.576 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv]
5
+ +-------------------------------+-------------------+-------------------+-----------------------------+
6
+ | timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp |
7
+ +-------------------------------+-------------------+-------------------+-----------------------------+
8
+ | 2015-07-13 00:00:00.100000000 | 1,436,713,200,100 | 1.4367132001E12 | 2015-07-12 15:00:00.100 UTC |
9
+ +-------------------------------+-------------------+-------------------+-----------------------------+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/timestamp.csv
3
+ path_prefix: example/from_timestamp.csv
4
4
  parser:
5
5
  type: csv
6
6
  default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
@@ -15,8 +15,8 @@ filters:
15
15
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
16
16
  columns:
17
17
  - {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
18
- - {name: timestamp2, type: timestamp}
19
- - {name: timestamp3, type: long, to_unit: ms}
20
- - {name: timestamp4, type: double, to_unit: ms}
18
+ - {name: timestamp2, type: long, to_unit: ms}
19
+ - {name: timestamp3, type: double, to_unit: ms}
20
+ - {name: timestamp4, type: timestamp}
21
21
  out:
22
22
  type: "null"
File without changes
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:25:21.964 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:25:22.829 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:25:22.844 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl'
4
+ 2016-11-06 14:25:22.850 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl]
5
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
6
+ | record:json |
7
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
8
+ | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} |
9
+ | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} |
10
+ +-----------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/nested.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_to_timezone: "Asia/Tokyo"
9
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
10
+ columns:
11
+ - {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms}
12
+ - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
13
+ out:
14
+ type: "null"
File without changes
@@ -0,0 +1,10 @@
1
+ 2016-11-06 14:25:02.170 +0900: Embulk v0.8.6
2
+ 2016-11-06 14:25:03.024 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path
3
+ 2016-11-06 14:25:03.039 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'timezone.csv'
4
+ 2016-11-06 14:25:03.043 +0900 [INFO] (0001:preview): Loading files [example/timezone.csv]
5
+ +----------------+-------------------------------------+
6
+ | string1:string | string2:string |
7
+ +----------------+-------------------------------------+
8
+ | 2015-07-13 | 2015-07-13 00:00:00.000000000 +0900 |
9
+ | 2015-07-13 | 2015-07-13 00:00:00.100000000 +0900 |
10
+ +----------------+-------------------------------------+
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/timezone.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ filters:
10
+ - type: timestamp_format
11
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
12
+ columns:
13
+ - {name: string1, to_format: "%Y-%m-%d", to_timezone: "Asia/Tokyo"}
14
+ - {name: string2, to_format: "%Y-%m-%d %H:%M:%S.%N %z", to_timezone: "Asia/Tokyo"}
15
+ out:
16
+ type: "null"
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
4
  import org.embulk.filter.timestamp_format.cast.DoubleCast;
4
5
  import org.embulk.filter.timestamp_format.cast.LongCast;
5
6
  import org.embulk.filter.timestamp_format.cast.StringCast;
@@ -237,7 +238,8 @@ public class ColumnCaster
237
238
 
238
239
  public void setFromJson(Column outputColumn, Value value)
239
240
  {
240
- String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
241
+ String pathFragment = PropertyPathToken.getPathFragment(outputColumn.getName());
242
+ String jsonPath = new StringBuilder("$").append(pathFragment).toString();
241
243
  pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value));
242
244
  }
243
245
  }
@@ -1,5 +1,6 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
3
4
  import org.embulk.spi.DataException;
4
5
  import org.embulk.spi.PageReader;
5
6
  import org.embulk.spi.Schema;
@@ -48,10 +49,9 @@ public class ColumnVisitorImpl
48
49
  // columnName => Boolean to avoid unnecessary cast
49
50
  for (ColumnConfig columnConfig : task.getColumns()) {
50
51
  String name = columnConfig.getName();
51
- if (name.startsWith("$.")) {
52
- String firstName = name.split("\\.", 3)[1]; // check only top level column name
53
- String firstPartName = firstName.split("\\[")[0];
54
- shouldCastSet.add(firstPartName);
52
+ if (PathCompiler.isProbablyJsonPath(name)) {
53
+ String columnName = JsonPathUtil.getColumnName(name);
54
+ shouldCastSet.add(columnName);
55
55
  continue;
56
56
  }
57
57
  shouldCastSet.add(name);
@@ -0,0 +1,78 @@
1
+ package org.embulk.filter.timestamp_format;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ private JsonPathUtil() {}
18
+
19
+ public static String getColumnName(String jsonPath)
20
+ {
21
+ Path compiledPath;
22
+ try {
23
+ compiledPath = PathCompiler.compile(jsonPath);
24
+ }
25
+ catch (InvalidPathException e) {
26
+ throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage()));
27
+ }
28
+ PathToken pathToken = compiledPath.getRoot();
29
+ pathToken = pathToken.next(); // skip $
30
+ return ((PropertyPathToken) pathToken).getProperties().get(0);
31
+ }
32
+
33
+ public static void assertJsonPathFormat(String path)
34
+ {
35
+ Path compiledPath;
36
+ try {
37
+ compiledPath = PathCompiler.compile(path);
38
+ }
39
+ catch (InvalidPathException e) {
40
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
41
+ }
42
+ PathToken pathToken = compiledPath.getRoot();
43
+ while (true) {
44
+ assertSupportedPathToken(pathToken, path);
45
+ if (pathToken.isLeaf()) {
46
+ break;
47
+ }
48
+ pathToken = pathToken.next();
49
+ }
50
+ }
51
+
52
+ protected static void assertSupportedPathToken(PathToken pathToken, String path)
53
+ {
54
+ if (pathToken instanceof ArrayPathToken) {
55
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
56
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
57
+ }
58
+ else if (pathToken instanceof ScanPathToken) {
59
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
60
+ }
61
+ else if (pathToken instanceof FunctionPathToken) {
62
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
63
+ }
64
+ else if (pathToken instanceof PredicatePathToken) {
65
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
66
+ }
67
+ }
68
+
69
+ protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
70
+ {
71
+ if (arrayIndexOperation == null) {
72
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
73
+ }
74
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
75
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
76
+ }
77
+ }
78
+ }
@@ -1,5 +1,10 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
3
8
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
4
9
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask;
5
10
 
@@ -28,19 +33,32 @@ public class JsonVisitor
28
33
  this.task = task;
29
34
  this.jsonCaster = jsonCaster;
30
35
 
36
+ assertJsonPathFormat();
31
37
  buildJsonPathColumnConfigMap();
32
38
  buildShouldVisitSet();
33
39
  }
34
40
 
41
+ private void assertJsonPathFormat()
42
+ {
43
+ for (ColumnConfig columnConfig : task.getColumns()) {
44
+ String name = columnConfig.getName();
45
+ if (!PathCompiler.isProbablyJsonPath(name)) {
46
+ continue;
47
+ }
48
+ JsonPathUtil.assertJsonPathFormat(name);
49
+ }
50
+ }
51
+
35
52
  private void buildJsonPathColumnConfigMap()
36
53
  {
37
54
  // json path => Type
38
55
  for (ColumnConfig columnConfig : task.getColumns()) {
39
56
  String name = columnConfig.getName();
40
- if (!name.startsWith("$.")) {
57
+ if (!PathCompiler.isProbablyJsonPath(name)) {
41
58
  continue;
42
59
  }
43
- this.jsonPathColumnConfigMap.put(name, columnConfig);
60
+ Path compiledPath = PathCompiler.compile(name);
61
+ this.jsonPathColumnConfigMap.put(compiledPath.toString(), columnConfig);
44
62
  }
45
63
  }
46
64
 
@@ -49,26 +67,16 @@ public class JsonVisitor
49
67
  // json partial path => Boolean to avoid unnecessary type: json visit
50
68
  for (ColumnConfig columnConfig : task.getColumns()) {
51
69
  String name = columnConfig.getName();
52
- if (!name.startsWith("$.")) {
70
+ if (! PathCompiler.isProbablyJsonPath(name)) {
53
71
  continue;
54
72
  }
55
- String[] parts = name.split("\\.");
73
+ Path compiledPath = PathCompiler.compile(name);
74
+ PathToken parts = compiledPath.getRoot();
56
75
  StringBuilder partialPath = new StringBuilder("$");
57
- for (int i = 1; i < parts.length; i++) {
58
- if (parts[i].contains("[")) {
59
- String[] arrayParts = parts[i].split("\\[");
60
- partialPath.append(".").append(arrayParts[0]);
61
- this.shouldVisitSet.add(partialPath.toString());
62
- for (int j = 1; j < arrayParts.length; j++) {
63
- // Support both [0] and [*]
64
- partialPath.append("[").append(arrayParts[j]);
65
- this.shouldVisitSet.add(partialPath.toString());
66
- }
67
- }
68
- else {
69
- partialPath.append(".").append(parts[i]);
70
- this.shouldVisitSet.add(partialPath.toString());
71
- }
76
+ while (! parts.isLeaf()) {
77
+ parts = parts.next(); // first next() skips "$"
78
+ partialPath.append(parts.getPathFragment());
79
+ this.shouldVisitSet.add(partialPath.toString());
72
80
  }
73
81
  }
74
82
  }
@@ -88,7 +96,8 @@ public class JsonVisitor
88
96
  int size = arrayValue.size();
89
97
  Value[] newValue = new Value[size];
90
98
  for (int i = 0; i < size; i++) {
91
- String k = new StringBuilder(rootPath).append("[").append(Integer.toString(i)).append("]").toString();
99
+ String pathFragment = ArrayPathToken.getPathFragment(i);
100
+ String k = new StringBuilder(rootPath).append(pathFragment).toString();
92
101
  if (!shouldVisit(k)) {
93
102
  k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
94
103
  }
@@ -105,7 +114,8 @@ public class JsonVisitor
105
114
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
106
115
  Value k = entry.getKey();
107
116
  Value v = entry.getValue();
108
- String newPath = new StringBuilder(rootPath).append(".").append(k.asStringValue().asString()).toString();
117
+ String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString());
118
+ String newPath = new StringBuilder(rootPath).append(pathFragment).toString();
109
119
  Value r = visit(newPath, v);
110
120
  newValue[i++] = k;
111
121
  newValue[i++] = r;
@@ -2,6 +2,7 @@ package org.embulk.filter.timestamp_format;
2
2
 
3
3
  import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
5
6
  import org.embulk.config.Config;
6
7
  import org.embulk.config.ConfigDefault;
7
8
  import org.embulk.config.ConfigException;
@@ -99,10 +100,9 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
99
100
  // throw if column does not exist
100
101
  for (ColumnConfig columnConfig : columns) {
101
102
  String name = columnConfig.getName();
102
- if (name.startsWith("$.")) {
103
- String firstName = name.split("\\.", 3)[1]; // check only top level column name
104
- String firstNameWithoutArray = firstName.split("\\[")[0];
105
- inputSchema.lookupColumn(firstNameWithoutArray);
103
+ if (PathCompiler.isProbablyJsonPath(name)) {
104
+ String columnName = JsonPathUtil.getColumnName(name);
105
+ inputSchema.lookupColumn(columnName);
106
106
  }
107
107
  else {
108
108
  inputSchema.lookupColumn(name);
@@ -119,7 +119,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
119
119
  if (type instanceof JsonType) {
120
120
  throw new ConfigException(String.format("casting to json is not available: \"%s\"", name));
121
121
  }
122
- if (name.startsWith("$.") && type instanceof TimestampType) {
122
+ if (PathCompiler.isProbablyJsonPath(name) && type instanceof TimestampType) {
123
123
  throw new ConfigException(String.format("casting a json path into timestamp is not available: \"%s\"", name));
124
124
  }
125
125
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-25 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,28 +58,32 @@ files:
58
58
  - bench/gen_dummy.rb
59
59
  - build.gradle
60
60
  - config/checkstyle/checkstyle.xml
61
- - example/double.csv
62
- - example/double.yml
61
+ - example/bracket_notation.txt
62
+ - example/bracket_notation.yml
63
63
  - example/empty.yml
64
- - example/example.jsonl
65
64
  - example/example.yml
66
- - example/example2.csv
67
- - example/example2.yml
68
- - example/json_double.jsonl
69
- - example/json_double.yml
70
- - example/json_long.jsonl
71
- - example/json_long.yml
72
- - example/json_string.jsonl
73
- - example/json_string.yml
74
- - example/long.csv
75
- - example/long.yml
76
- - example/string.csv
77
- - example/string.yml
78
- - example/string_auto_java.yml
79
- - example/string_java.yml
80
- - example/string_nano.yml
81
- - example/timestamp.csv
82
- - example/timestamp.yml
65
+ - example/from_double.csv
66
+ - example/from_double.txt
67
+ - example/from_double.yml
68
+ - example/from_long.csv
69
+ - example/from_long.txt
70
+ - example/from_long.yml
71
+ - example/from_string.csv
72
+ - example/from_string.txt
73
+ - example/from_string.yml
74
+ - example/from_string_auto_java.txt
75
+ - example/from_string_auto_java.yml
76
+ - example/from_string_java.txt
77
+ - example/from_string_java.yml
78
+ - example/from_timestamp.csv
79
+ - example/from_timestamp.txt
80
+ - example/from_timestamp.yml
81
+ - example/nested.jsonl
82
+ - example/nested.txt
83
+ - example/nested.yml
84
+ - example/timezone.csv
85
+ - example/timezone.txt
86
+ - example/timezone.yml
83
87
  - gradle/wrapper/gradle-wrapper.jar
84
88
  - gradle/wrapper/gradle-wrapper.properties
85
89
  - gradlew
@@ -89,6 +93,7 @@ files:
89
93
  - src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java
90
94
  - src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
91
95
  - src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
96
+ - src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java
92
97
  - src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
93
98
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
94
99
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
@@ -102,7 +107,13 @@ files:
102
107
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
103
108
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
104
109
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
105
- - classpath/embulk-filter-timestamp_format-0.2.3.jar
110
+ - classpath/accessors-smart-1.1.jar
111
+ - classpath/asm-5.0.3.jar
112
+ - classpath/commons-lang3-3.4.jar
113
+ - classpath/embulk-filter-timestamp_format-0.2.4.jar
114
+ - classpath/json-smart-2.2.1.jar
115
+ - classpath/JsonPathCompiler-0.1.1.jar
116
+ - classpath/slf4j-api-1.7.21.jar
106
117
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
107
118
  licenses:
108
119
  - MIT
data/example/double.csv DELETED
@@ -1,2 +0,0 @@
1
- 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2
2
-
data/example/example2.yml DELETED
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example2.csv
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- filters:
10
- - type: timestamp_format
11
- columns:
12
- - {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
13
- out:
14
- type: "null"
@@ -1 +0,0 @@
1
- {"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_double.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_unit: ms
9
- columns:
10
- - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.double2, type: long}
12
- - {name: $.record.double3, type: double}
13
- out:
14
- type: "null"
@@ -1 +0,0 @@
1
- {"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_long.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_unit: ms
9
- columns:
10
- - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.long2, type: long}
12
- - {name: $.record.long3, type: double}
13
- out:
14
- type: "null"
@@ -1,2 +0,0 @@
1
- {"string1":"2015-07-12 15:00:00 UTC","string2":"2015-07-12 15:00:00 UTC","string3":"2015-07-12 15:00:00 UTC"}
2
- {"string1":"2015-07-12 15:00:00.1 UTC","string2":"2015-07-12 15:00:00.1 UTC","string3":"2015-07-12 15:00:00.1 UTC"}
@@ -1,14 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/json_string.jsonl
4
- parser:
5
- type: json
6
- filters:
7
- - type: timestamp_format
8
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
9
- columns:
10
- - {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
- - {name: $.record.string2, type: long, to_unit: ms}
12
- - {name: $.record.string3, type: double, to_unit: ms}
13
- out:
14
- type: "null"
data/example/long.csv DELETED
@@ -1 +0,0 @@
1
- 1436713200100,1436713200100,1436713200100,1436713200100
data/example/string.csv DELETED
@@ -1,14 +0,0 @@
1
- 2015-07-13,2015-07-13,2015-07-13,2015-07-13
2
- 2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC
3
- 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
4
- 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
5
- 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
6
- 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
7
- 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
8
- 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
9
- 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
10
- 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
11
- 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
12
- 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
13
- 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
14
-
@@ -1,23 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/string.csv
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- - {name: string3, type: string}
10
- - {name: string4, type: string}
11
- filters:
12
- - type: timestamp_format
13
- default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
- default_to_timezone: "Asia/Taipei"
16
- default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
- columns:
18
- - {name: string1}
19
- - {name: string2, type: timestamp}
20
- - {name: string3, type: long, to_unit: ms}
21
- - {name: string4, type: double, to_unit: ms}
22
- out:
23
- type: "null"