embulk-filter-timestamp_format 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/CHANGELOG.md +7 -0
- data/README.md +46 -1
- data/bench/config_java.yml +14 -0
- data/bench/config_jruby.yml +14 -0
- data/bench/gen_dummy.rb +5 -0
- data/build.gradle +1 -1
- data/example/double.csv +2 -0
- data/example/double.yml +20 -0
- data/example/{json_example.jsonl → example.jsonl} +0 -0
- data/example/example.yml +4 -12
- data/example/example2.csv +2 -0
- data/example/example2.yml +14 -0
- data/example/json_double.jsonl +1 -0
- data/example/json_double.yml +14 -0
- data/example/json_long.jsonl +1 -0
- data/example/json_long.yml +14 -0
- data/example/json_string.jsonl +2 -0
- data/example/json_string.yml +14 -0
- data/example/long.csv +1 -0
- data/example/long.yml +20 -0
- data/example/string.csv +4 -0
- data/example/{string_example.yml → string.yml} +6 -5
- data/example/string_java.yml +23 -0
- data/example/timestamp.csv +2 -0
- data/example/{timestamp_example.yml → timestamp.yml} +4 -4
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +107 -14
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +104 -33
- data/src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java +61 -4
- data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +8 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +28 -17
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +36 -5
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +57 -26
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnit.java +112 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +54 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/DoubleCast.java +32 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java +32 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java +20 -4
- data/src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java +5 -6
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java +192 -0
- metadata +29 -8
- data/example/json_example.yml +0 -14
- data/src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb84426fbf7dfbaac21925ece7475f322c7c2010
|
4
|
+
data.tar.gz: 2c5accec8470864588c20d62c42626a89a584bab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77f18cde0d2a3626198d1e94aa1c7e1b6c47d3c8572c9397cf1c5ec8a4ec808fcb9dea636339c0e838e00f418cefd290d92d77b44669a6df31c1c3e2fd8b9bb
|
7
|
+
data.tar.gz: c3464272e96b244c782b65e8e536a1f64fc488646958c58d04a068550c85d718f137081662ce5fd7fe1370aa6788bda4b8598a7e0e606b7a00e6811e096177a5
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -13,11 +13,15 @@ A filter plugin for Embulk to change timestamp format
|
|
13
13
|
- **from_timezone**: specify the timezone of the input string (string, default is default_from_timezone)
|
14
14
|
- **to_format**: specify the format of the output string (string, default is default_to_timestamp_format)
|
15
15
|
- **to_timezone**: specify the timezone of the output string (string, default is default_to_timezone)
|
16
|
+
- **from_unit**: specify the time unit of the input unixtimestamp (string, default is default_from_timestamp_unit)
|
17
|
+
- **to_unit**: specify the time unit of the output unixtimestamp (string, default is default_to_timestamp_unit)
|
16
18
|
- **default_from_timestamp_format**: default timestamp format for the input string (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
|
17
19
|
- **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
|
18
20
|
- **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
19
21
|
- **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
|
20
|
-
|
22
|
+
- **default_from_timetamp_unit**: default time unit such as second, ms, us, ns for the input unixtimestamp (string, default is `second`)
|
23
|
+
- **default_to_timetamp_unit**: default time unit such as second, ms, us, ns for the output unixtimestamp (string, default is `second`)
|
24
|
+
- **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
|
21
25
|
|
22
26
|
## Example
|
23
27
|
|
@@ -56,6 +60,47 @@ Output will be as:
|
|
56
60
|
|
57
61
|
See [./example](./example) for more examples.
|
58
62
|
|
63
|
+
## Timestamp Parser/Formatter Performance Issue
|
64
|
+
|
65
|
+
Embulk's timestamp parser/formatter originally uses jruby implementation, but it is slow.
|
66
|
+
To improve performance, this plugin also supports Java's [SimpleDateFormat](https://docs.oracle.com/javase/jp/6/api/java/text/SimpleDateFormat.html) format as:
|
67
|
+
|
68
|
+
```yaml
|
69
|
+
in:
|
70
|
+
type: file
|
71
|
+
path_prefix: example/example.jsonl
|
72
|
+
parser:
|
73
|
+
type: jsonl
|
74
|
+
columns:
|
75
|
+
- {name: timestamp, type: string}
|
76
|
+
- {name: nested, type: json}
|
77
|
+
filters:
|
78
|
+
- type: timestamp_format
|
79
|
+
default_from_timezone: "Asia/Taipei"
|
80
|
+
default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.SSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
81
|
+
default_to_timezone: "Asia/Taipei"
|
82
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
83
|
+
columns:
|
84
|
+
- {name: timestamp}
|
85
|
+
- {name: $.nested.timestamp}
|
86
|
+
out:
|
87
|
+
type: stdout
|
88
|
+
```
|
89
|
+
|
90
|
+
If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
|
91
|
+
|
92
|
+
**COMPARISON:**
|
93
|
+
|
94
|
+
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
95
|
+
|
96
|
+
* jruby parser/formatter: 65.06s
|
97
|
+
* java parser/formatter: 1.3s
|
98
|
+
|
99
|
+
**NOTICE:**
|
100
|
+
|
101
|
+
* JRuby parser has micro second resolution, but Java parser (SimpleDateFormat) has only milli second resolution
|
102
|
+
* `S` requires three digits always. For example, `yyyy-MM-dd HH:mm::ss.S` for `2015-12-17 01:02:03.1` gives 001 milli seconds wrongly, but it is the specification of SimpleDateFormat.
|
103
|
+
|
59
104
|
## ToDo
|
60
105
|
|
61
106
|
* Write test
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "%Y-%m-%d"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/bench/gen_dummy.rb
ADDED
data/build.gradle
CHANGED
data/example/double.csv
ADDED
data/example/double.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/double.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: double1, type: double}
|
8
|
+
- {name: double2, type: double}
|
9
|
+
- {name: double3, type: double}
|
10
|
+
- {name: double4, type: double}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timestamp_unit: ms
|
14
|
+
columns:
|
15
|
+
- {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
+
- {name: double2, type: timestamp}
|
17
|
+
- {name: double3, type: long}
|
18
|
+
- {name: double4, type: double}
|
19
|
+
out:
|
20
|
+
type: "null"
|
File without changes
|
data/example/example.yml
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/example.jsonl
|
4
4
|
parser:
|
5
|
-
type:
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
- {name: string3, type: string}
|
10
|
-
- {name: string4, type: string}
|
5
|
+
type: json
|
11
6
|
filters:
|
12
7
|
- type: timestamp_format
|
13
8
|
default_to_timezone: "Asia/Tokyo"
|
14
9
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
15
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
16
10
|
columns:
|
17
|
-
- {name:
|
18
|
-
- {name:
|
19
|
-
- {name: string3, type: long}
|
20
|
-
- {name: string4, type: double}
|
11
|
+
- {name: "$.record.timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
12
|
+
- {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
21
13
|
out:
|
22
14
|
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example2.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
filters:
|
10
|
+
- type: timestamp_format
|
11
|
+
columns:
|
12
|
+
- {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
{"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_double.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_unit: ms
|
9
|
+
columns:
|
10
|
+
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.double2, type: long}
|
12
|
+
- {name: $.record.double3, type: double}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
{"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_long.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_unit: ms
|
9
|
+
columns:
|
10
|
+
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.long2, type: long}
|
12
|
+
- {name: $.record.long3, type: double}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_string.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
|
9
|
+
columns:
|
10
|
+
- {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
12
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/example/long.csv
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100,1436713200100,1436713200100,1436713200100
|
data/example/long.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/long.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: long1, type: long}
|
8
|
+
- {name: long2, type: long}
|
9
|
+
- {name: long3, type: long}
|
10
|
+
- {name: long4, type: long}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timestamp_unit: ms
|
14
|
+
columns:
|
15
|
+
- {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
+
- {name: long2, type: timestamp}
|
17
|
+
- {name: long3, type: long}
|
18
|
+
- {name: long4, type: double}
|
19
|
+
out:
|
20
|
+
type: "null"
|
data/example/string.csv
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
2
|
+
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
3
|
+
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
4
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -10,13 +10,14 @@ in:
|
|
10
10
|
- {name: string4, type: string}
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
|
-
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
14
16
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
15
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
16
17
|
columns:
|
17
18
|
- {name: string1}
|
18
19
|
- {name: string2, type: timestamp}
|
19
|
-
- {name: string3, type: long}
|
20
|
-
- {name: string4, type: double}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
21
22
|
out:
|
22
23
|
type: "null"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] # SSS must be three digit ...
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
|
+
columns:
|
18
|
+
- {name: string1}
|
19
|
+
- {name: string2, type: timestamp}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
22
|
+
out:
|
23
|
+
type: "null"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/timestamp.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
|
@@ -14,9 +14,9 @@ filters:
|
|
14
14
|
default_to_timezone: "Asia/Tokyo"
|
15
15
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
16
16
|
columns:
|
17
|
-
- {name: timestamp1}
|
17
|
+
- {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
18
18
|
- {name: timestamp2, type: timestamp}
|
19
|
-
- {name: timestamp3, type: long}
|
20
|
-
- {name: timestamp4, type: double}
|
19
|
+
- {name: timestamp3, type: long, to_unit: ms}
|
20
|
+
- {name: timestamp4, type: double, to_unit: ms}
|
21
21
|
out:
|
22
22
|
type: "null"
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import org.embulk.filter.timestamp_format.cast.DoubleCast;
|
4
|
+
import org.embulk.filter.timestamp_format.cast.LongCast;
|
3
5
|
import org.embulk.filter.timestamp_format.cast.StringCast;
|
4
6
|
import org.embulk.filter.timestamp_format.cast.TimestampCast;
|
5
7
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
|
@@ -32,6 +34,8 @@ public class ColumnCaster
|
|
32
34
|
private final PageBuilder pageBuilder;
|
33
35
|
private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
|
34
36
|
private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
|
37
|
+
private final HashMap<String, TimestampUnit> fromTimestampUnitMap = new HashMap<>();
|
38
|
+
private final HashMap<String, TimestampUnit> toTimestampUnitMap = new HashMap<>();
|
35
39
|
private final JsonVisitor jsonVisitor;
|
36
40
|
|
37
41
|
ColumnCaster(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
|
@@ -44,29 +48,23 @@ public class ColumnCaster
|
|
44
48
|
|
45
49
|
buildTimestampParserMap();
|
46
50
|
buildTimestampFormatterMap();
|
51
|
+
buildFromTimestampUnitMap();
|
52
|
+
buildToTimestampUnitMap();
|
47
53
|
|
48
|
-
JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap);
|
54
|
+
JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap, fromTimestampUnitMap, toTimestampUnitMap);
|
49
55
|
this.jsonVisitor = new JsonVisitor(task, jsonCaster);
|
50
56
|
}
|
51
57
|
|
52
58
|
private void buildTimestampParserMap()
|
53
59
|
{
|
54
60
|
// columnName or jsonPath => TimestampParser
|
61
|
+
// we do not know input type of json here, so creates anyway
|
55
62
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
56
63
|
TimestampParser parser = getTimestampParser(columnConfig, task);
|
57
64
|
this.timestampParserMap.put(columnConfig.getName(), parser);
|
58
65
|
}
|
59
66
|
}
|
60
67
|
|
61
|
-
private void buildTimestampFormatterMap()
|
62
|
-
{
|
63
|
-
// columnName or jsonPath => TimestampFormatter
|
64
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
65
|
-
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
66
|
-
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
68
|
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
71
69
|
{
|
72
70
|
DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
|
@@ -74,6 +72,17 @@ public class ColumnCaster
|
|
74
72
|
return new TimestampParser(task.getJRuby(), formatList, timezone);
|
75
73
|
}
|
76
74
|
|
75
|
+
private void buildTimestampFormatterMap()
|
76
|
+
{
|
77
|
+
// columnName or jsonPath => TimestampFormatter
|
78
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
79
|
+
if (columnConfig.getType() instanceof StringType) {
|
80
|
+
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
81
|
+
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
77
86
|
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
78
87
|
{
|
79
88
|
String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
|
@@ -81,6 +90,86 @@ public class ColumnCaster
|
|
81
90
|
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
82
91
|
}
|
83
92
|
|
93
|
+
private void buildFromTimestampUnitMap()
|
94
|
+
{
|
95
|
+
// columnName or jsonPath => TimestampUnit
|
96
|
+
// we do not know input type of json here, so creates anyway
|
97
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
98
|
+
TimestampUnit unit = getFromTimestampUnit(columnConfig, task);
|
99
|
+
this.fromTimestampUnitMap.put(columnConfig.getName(), unit);
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
private TimestampUnit getFromTimestampUnit(ColumnConfig columnConfig, PluginTask task)
|
104
|
+
{
|
105
|
+
return columnConfig.getFromUnit().or(task.getDefaultFromTimestampUnit());
|
106
|
+
}
|
107
|
+
|
108
|
+
private void buildToTimestampUnitMap()
|
109
|
+
{
|
110
|
+
// columnName or jsonPath => TimestampUnit
|
111
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
112
|
+
Type type = columnConfig.getType();
|
113
|
+
if (type instanceof LongType || type instanceof DoubleType) {
|
114
|
+
TimestampUnit unit = getToTimestampUnit(columnConfig, task);
|
115
|
+
this.toTimestampUnitMap.put(columnConfig.getName(), unit);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
private TimestampUnit getToTimestampUnit(ColumnConfig columnConfig, PluginTask task)
|
121
|
+
{
|
122
|
+
return columnConfig.getToUnit().or(task.getDefaultToTimestampUnit());
|
123
|
+
}
|
124
|
+
|
125
|
+
public void setFromLong(Column outputColumn, long value)
|
126
|
+
{
|
127
|
+
Type outputType = outputColumn.getType();
|
128
|
+
TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
|
129
|
+
if (outputType instanceof StringType) {
|
130
|
+
TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
|
131
|
+
pageBuilder.setString(outputColumn, LongCast.asString(value, fromUnit, timestampFormatter));
|
132
|
+
}
|
133
|
+
else if (outputType instanceof TimestampType) {
|
134
|
+
pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value, fromUnit));
|
135
|
+
}
|
136
|
+
else if (outputType instanceof LongType) {
|
137
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
138
|
+
pageBuilder.setLong(outputColumn, LongCast.asLong(value, fromUnit, toUnit));
|
139
|
+
}
|
140
|
+
else if (outputType instanceof DoubleType) {
|
141
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
142
|
+
pageBuilder.setDouble(outputColumn, LongCast.asDouble(value, fromUnit, toUnit));
|
143
|
+
}
|
144
|
+
else {
|
145
|
+
assert false;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
public void setFromDouble(Column outputColumn, double value)
|
150
|
+
{
|
151
|
+
Type outputType = outputColumn.getType();
|
152
|
+
TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
|
153
|
+
if (outputType instanceof StringType) {
|
154
|
+
TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
|
155
|
+
pageBuilder.setString(outputColumn, DoubleCast.asString(value, fromUnit, timestampFormatter));
|
156
|
+
}
|
157
|
+
else if (outputType instanceof TimestampType) {
|
158
|
+
pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value, fromUnit));
|
159
|
+
}
|
160
|
+
else if (outputType instanceof LongType) {
|
161
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
162
|
+
pageBuilder.setLong(outputColumn, DoubleCast.asLong(value, fromUnit, toUnit));
|
163
|
+
}
|
164
|
+
else if (outputType instanceof DoubleType) {
|
165
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
166
|
+
pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value, fromUnit, toUnit));
|
167
|
+
}
|
168
|
+
else {
|
169
|
+
assert false;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
84
173
|
public void setFromString(Column outputColumn, String value)
|
85
174
|
{
|
86
175
|
Type outputType = outputColumn.getType();
|
@@ -93,10 +182,12 @@ public class ColumnCaster
|
|
93
182
|
pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
|
94
183
|
}
|
95
184
|
else if (outputType instanceof LongType) {
|
96
|
-
|
185
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
186
|
+
pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser, toUnit));
|
97
187
|
}
|
98
188
|
else if (outputType instanceof DoubleType) {
|
99
|
-
|
189
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
190
|
+
pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser, toUnit));
|
100
191
|
}
|
101
192
|
else {
|
102
193
|
assert false;
|
@@ -114,10 +205,12 @@ public class ColumnCaster
|
|
114
205
|
pageBuilder.setTimestamp(outputColumn, value);
|
115
206
|
}
|
116
207
|
else if (outputType instanceof LongType) {
|
117
|
-
|
208
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
209
|
+
pageBuilder.setLong(outputColumn, TimestampCast.asLong(value, toUnit));
|
118
210
|
}
|
119
211
|
else if (outputType instanceof DoubleType) {
|
120
|
-
|
212
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
213
|
+
pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value, toUnit));
|
121
214
|
}
|
122
215
|
else {
|
123
216
|
assert false;
|