embulk-filter-timestamp_format 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/CHANGELOG.md +7 -0
- data/README.md +46 -1
- data/bench/config_java.yml +14 -0
- data/bench/config_jruby.yml +14 -0
- data/bench/gen_dummy.rb +5 -0
- data/build.gradle +1 -1
- data/example/double.csv +2 -0
- data/example/double.yml +20 -0
- data/example/{json_example.jsonl → example.jsonl} +0 -0
- data/example/example.yml +4 -12
- data/example/example2.csv +2 -0
- data/example/example2.yml +14 -0
- data/example/json_double.jsonl +1 -0
- data/example/json_double.yml +14 -0
- data/example/json_long.jsonl +1 -0
- data/example/json_long.yml +14 -0
- data/example/json_string.jsonl +2 -0
- data/example/json_string.yml +14 -0
- data/example/long.csv +1 -0
- data/example/long.yml +20 -0
- data/example/string.csv +4 -0
- data/example/{string_example.yml → string.yml} +6 -5
- data/example/string_java.yml +23 -0
- data/example/timestamp.csv +2 -0
- data/example/{timestamp_example.yml → timestamp.yml} +4 -4
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +107 -14
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +104 -33
- data/src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java +61 -4
- data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +8 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +28 -17
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +36 -5
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +57 -26
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnit.java +112 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +54 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/DoubleCast.java +32 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java +32 -0
- data/src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java +20 -4
- data/src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java +5 -6
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java +192 -0
- metadata +29 -8
- data/example/json_example.yml +0 -14
- data/src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb84426fbf7dfbaac21925ece7475f322c7c2010
|
4
|
+
data.tar.gz: 2c5accec8470864588c20d62c42626a89a584bab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77f18cde0d2a3626198d1e94aa1c7e1b6c47d3c8572c9397cf1c5ec8a4ec808fcb9dea636339c0e838e00f418cefd290d92d77b44669a6df31c1c3e2fd8b9bb
|
7
|
+
data.tar.gz: c3464272e96b244c782b65e8e536a1f64fc488646958c58d04a068550c85d718f137081662ce5fd7fe1370aa6788bda4b8598a7e0e606b7a00e6811e096177a5
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -13,11 +13,15 @@ A filter plugin for Embulk to change timestamp format
|
|
13
13
|
- **from_timezone**: specify the timezone of the input string (string, default is default_from_timezone)
|
14
14
|
- **to_format**: specify the format of the output string (string, default is default_to_timestamp_format)
|
15
15
|
- **to_timezone**: specify the timezone of the output string (string, default is default_to_timezone)
|
16
|
+
- **from_unit**: specify the time unit of the input unixtimestamp (string, default is default_from_timestamp_unit)
|
17
|
+
- **to_unit**: specify the time unit of the output unixtimestamp (string, default is default_to_timestamp_unit)
|
16
18
|
- **default_from_timestamp_format**: default timestamp format for the input string (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
|
17
19
|
- **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
|
18
20
|
- **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
19
21
|
- **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
|
20
|
-
|
22
|
+
- **default_from_timetamp_unit**: default time unit such as second, ms, us, ns for the input unixtimestamp (string, default is `second`)
|
23
|
+
- **default_to_timetamp_unit**: default time unit such as second, ms, us, ns for the output unixtimestamp (string, default is `second`)
|
24
|
+
- **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
|
21
25
|
|
22
26
|
## Example
|
23
27
|
|
@@ -56,6 +60,47 @@ Output will be as:
|
|
56
60
|
|
57
61
|
See [./example](./example) for more examples.
|
58
62
|
|
63
|
+
## Timestamp Parser/Formatter Performance Issue
|
64
|
+
|
65
|
+
Embulk's timestamp parser/formatter originally uses jruby implementation, but it is slow.
|
66
|
+
To improve performance, this plugin also supports Java's [SimpleDateFormat](https://docs.oracle.com/javase/jp/6/api/java/text/SimpleDateFormat.html) format as:
|
67
|
+
|
68
|
+
```yaml
|
69
|
+
in:
|
70
|
+
type: file
|
71
|
+
path_prefix: example/example.jsonl
|
72
|
+
parser:
|
73
|
+
type: jsonl
|
74
|
+
columns:
|
75
|
+
- {name: timestamp, type: string}
|
76
|
+
- {name: nested, type: json}
|
77
|
+
filters:
|
78
|
+
- type: timestamp_format
|
79
|
+
default_from_timezone: "Asia/Taipei"
|
80
|
+
default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.SSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
81
|
+
default_to_timezone: "Asia/Taipei"
|
82
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
83
|
+
columns:
|
84
|
+
- {name: timestamp}
|
85
|
+
- {name: $.nested.timestamp}
|
86
|
+
out:
|
87
|
+
type: stdout
|
88
|
+
```
|
89
|
+
|
90
|
+
If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
|
91
|
+
|
92
|
+
**COMPARISON:**
|
93
|
+
|
94
|
+
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
95
|
+
|
96
|
+
* jruby parser/formatter: 65.06s
|
97
|
+
* java parser/formatter: 1.3s
|
98
|
+
|
99
|
+
**NOTICE:**
|
100
|
+
|
101
|
+
* JRuby parser has micro second resolution, but Java parser (SimpleDateFormat) has only milli second resolution
|
102
|
+
* `S` requires three digits always. For example, `yyyy-MM-dd HH:mm::ss.S` for `2015-12-17 01:02:03.1` gives 001 milli seconds wrongly, but it is the specification of SimpleDateFormat.
|
103
|
+
|
59
104
|
## ToDo
|
60
105
|
|
61
106
|
* Write test
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "%Y-%m-%d"}
|
12
|
+
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/bench/gen_dummy.rb
ADDED
data/build.gradle
CHANGED
data/example/double.csv
ADDED
data/example/double.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/double.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: double1, type: double}
|
8
|
+
- {name: double2, type: double}
|
9
|
+
- {name: double3, type: double}
|
10
|
+
- {name: double4, type: double}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timestamp_unit: ms
|
14
|
+
columns:
|
15
|
+
- {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
+
- {name: double2, type: timestamp}
|
17
|
+
- {name: double3, type: long}
|
18
|
+
- {name: double4, type: double}
|
19
|
+
out:
|
20
|
+
type: "null"
|
File without changes
|
data/example/example.yml
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/example.jsonl
|
4
4
|
parser:
|
5
|
-
type:
|
6
|
-
columns:
|
7
|
-
- {name: string1, type: string}
|
8
|
-
- {name: string2, type: string}
|
9
|
-
- {name: string3, type: string}
|
10
|
-
- {name: string4, type: string}
|
5
|
+
type: json
|
11
6
|
filters:
|
12
7
|
- type: timestamp_format
|
13
8
|
default_to_timezone: "Asia/Tokyo"
|
14
9
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
15
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
16
10
|
columns:
|
17
|
-
- {name:
|
18
|
-
- {name:
|
19
|
-
- {name: string3, type: long}
|
20
|
-
- {name: string4, type: double}
|
11
|
+
- {name: "$.record.timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
12
|
+
- {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
|
21
13
|
out:
|
22
14
|
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example2.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
filters:
|
10
|
+
- type: timestamp_format
|
11
|
+
columns:
|
12
|
+
- {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
{"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_double.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_unit: ms
|
9
|
+
columns:
|
10
|
+
- {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.double2, type: long}
|
12
|
+
- {name: $.record.double3, type: double}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1 @@
|
|
1
|
+
{"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_long.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_unit: ms
|
9
|
+
columns:
|
10
|
+
- {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.long2, type: long}
|
12
|
+
- {name: $.record.long3, type: double}
|
13
|
+
out:
|
14
|
+
type: "null"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/json_string.jsonl
|
4
|
+
parser:
|
5
|
+
type: json
|
6
|
+
filters:
|
7
|
+
- type: timestamp_format
|
8
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
|
9
|
+
columns:
|
10
|
+
- {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
11
|
+
- {name: $.record.string2, type: long, to_unit: ms}
|
12
|
+
- {name: $.record.string3, type: double, to_unit: ms}
|
13
|
+
out:
|
14
|
+
type: "null"
|
data/example/long.csv
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1436713200100,1436713200100,1436713200100,1436713200100
|
data/example/long.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/long.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: long1, type: long}
|
8
|
+
- {name: long2, type: long}
|
9
|
+
- {name: long3, type: long}
|
10
|
+
- {name: long4, type: long}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timestamp_unit: ms
|
14
|
+
columns:
|
15
|
+
- {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
16
|
+
- {name: long2, type: timestamp}
|
17
|
+
- {name: long3, type: long}
|
18
|
+
- {name: long4, type: double}
|
19
|
+
out:
|
20
|
+
type: "null"
|
data/example/string.csv
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
2
|
+
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
3
|
+
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
4
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/string.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
columns:
|
@@ -10,13 +10,14 @@ in:
|
|
10
10
|
- {name: string4, type: string}
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
|
-
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
14
16
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
15
|
-
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
|
16
17
|
columns:
|
17
18
|
- {name: string1}
|
18
19
|
- {name: string2, type: timestamp}
|
19
|
-
- {name: string3, type: long}
|
20
|
-
- {name: string4, type: double}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
21
22
|
out:
|
22
23
|
type: "null"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] # SSS must be three digit ...
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
|
+
columns:
|
18
|
+
- {name: string1}
|
19
|
+
- {name: string2, type: timestamp}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
22
|
+
out:
|
23
|
+
type: "null"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: example/
|
3
|
+
path_prefix: example/timestamp.csv
|
4
4
|
parser:
|
5
5
|
type: csv
|
6
6
|
default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
|
@@ -14,9 +14,9 @@ filters:
|
|
14
14
|
default_to_timezone: "Asia/Tokyo"
|
15
15
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
16
16
|
columns:
|
17
|
-
- {name: timestamp1}
|
17
|
+
- {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
|
18
18
|
- {name: timestamp2, type: timestamp}
|
19
|
-
- {name: timestamp3, type: long}
|
20
|
-
- {name: timestamp4, type: double}
|
19
|
+
- {name: timestamp3, type: long, to_unit: ms}
|
20
|
+
- {name: timestamp4, type: double, to_unit: ms}
|
21
21
|
out:
|
22
22
|
type: "null"
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
+
import org.embulk.filter.timestamp_format.cast.DoubleCast;
|
4
|
+
import org.embulk.filter.timestamp_format.cast.LongCast;
|
3
5
|
import org.embulk.filter.timestamp_format.cast.StringCast;
|
4
6
|
import org.embulk.filter.timestamp_format.cast.TimestampCast;
|
5
7
|
import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
|
@@ -32,6 +34,8 @@ public class ColumnCaster
|
|
32
34
|
private final PageBuilder pageBuilder;
|
33
35
|
private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
|
34
36
|
private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
|
37
|
+
private final HashMap<String, TimestampUnit> fromTimestampUnitMap = new HashMap<>();
|
38
|
+
private final HashMap<String, TimestampUnit> toTimestampUnitMap = new HashMap<>();
|
35
39
|
private final JsonVisitor jsonVisitor;
|
36
40
|
|
37
41
|
ColumnCaster(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
|
@@ -44,29 +48,23 @@ public class ColumnCaster
|
|
44
48
|
|
45
49
|
buildTimestampParserMap();
|
46
50
|
buildTimestampFormatterMap();
|
51
|
+
buildFromTimestampUnitMap();
|
52
|
+
buildToTimestampUnitMap();
|
47
53
|
|
48
|
-
JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap);
|
54
|
+
JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap, fromTimestampUnitMap, toTimestampUnitMap);
|
49
55
|
this.jsonVisitor = new JsonVisitor(task, jsonCaster);
|
50
56
|
}
|
51
57
|
|
52
58
|
private void buildTimestampParserMap()
|
53
59
|
{
|
54
60
|
// columnName or jsonPath => TimestampParser
|
61
|
+
// we do not know input type of json here, so creates anyway
|
55
62
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
56
63
|
TimestampParser parser = getTimestampParser(columnConfig, task);
|
57
64
|
this.timestampParserMap.put(columnConfig.getName(), parser);
|
58
65
|
}
|
59
66
|
}
|
60
67
|
|
61
|
-
private void buildTimestampFormatterMap()
|
62
|
-
{
|
63
|
-
// columnName or jsonPath => TimestampFormatter
|
64
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
65
|
-
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
66
|
-
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
68
|
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
71
69
|
{
|
72
70
|
DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
|
@@ -74,6 +72,17 @@ public class ColumnCaster
|
|
74
72
|
return new TimestampParser(task.getJRuby(), formatList, timezone);
|
75
73
|
}
|
76
74
|
|
75
|
+
private void buildTimestampFormatterMap()
|
76
|
+
{
|
77
|
+
// columnName or jsonPath => TimestampFormatter
|
78
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
79
|
+
if (columnConfig.getType() instanceof StringType) {
|
80
|
+
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
81
|
+
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
77
86
|
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
78
87
|
{
|
79
88
|
String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
|
@@ -81,6 +90,86 @@ public class ColumnCaster
|
|
81
90
|
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
82
91
|
}
|
83
92
|
|
93
|
+
private void buildFromTimestampUnitMap()
|
94
|
+
{
|
95
|
+
// columnName or jsonPath => TimestampUnit
|
96
|
+
// we do not know input type of json here, so creates anyway
|
97
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
98
|
+
TimestampUnit unit = getFromTimestampUnit(columnConfig, task);
|
99
|
+
this.fromTimestampUnitMap.put(columnConfig.getName(), unit);
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
private TimestampUnit getFromTimestampUnit(ColumnConfig columnConfig, PluginTask task)
|
104
|
+
{
|
105
|
+
return columnConfig.getFromUnit().or(task.getDefaultFromTimestampUnit());
|
106
|
+
}
|
107
|
+
|
108
|
+
private void buildToTimestampUnitMap()
|
109
|
+
{
|
110
|
+
// columnName or jsonPath => TimestampUnit
|
111
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
112
|
+
Type type = columnConfig.getType();
|
113
|
+
if (type instanceof LongType || type instanceof DoubleType) {
|
114
|
+
TimestampUnit unit = getToTimestampUnit(columnConfig, task);
|
115
|
+
this.toTimestampUnitMap.put(columnConfig.getName(), unit);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
private TimestampUnit getToTimestampUnit(ColumnConfig columnConfig, PluginTask task)
|
121
|
+
{
|
122
|
+
return columnConfig.getToUnit().or(task.getDefaultToTimestampUnit());
|
123
|
+
}
|
124
|
+
|
125
|
+
public void setFromLong(Column outputColumn, long value)
|
126
|
+
{
|
127
|
+
Type outputType = outputColumn.getType();
|
128
|
+
TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
|
129
|
+
if (outputType instanceof StringType) {
|
130
|
+
TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
|
131
|
+
pageBuilder.setString(outputColumn, LongCast.asString(value, fromUnit, timestampFormatter));
|
132
|
+
}
|
133
|
+
else if (outputType instanceof TimestampType) {
|
134
|
+
pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value, fromUnit));
|
135
|
+
}
|
136
|
+
else if (outputType instanceof LongType) {
|
137
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
138
|
+
pageBuilder.setLong(outputColumn, LongCast.asLong(value, fromUnit, toUnit));
|
139
|
+
}
|
140
|
+
else if (outputType instanceof DoubleType) {
|
141
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
142
|
+
pageBuilder.setDouble(outputColumn, LongCast.asDouble(value, fromUnit, toUnit));
|
143
|
+
}
|
144
|
+
else {
|
145
|
+
assert false;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
public void setFromDouble(Column outputColumn, double value)
|
150
|
+
{
|
151
|
+
Type outputType = outputColumn.getType();
|
152
|
+
TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
|
153
|
+
if (outputType instanceof StringType) {
|
154
|
+
TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
|
155
|
+
pageBuilder.setString(outputColumn, DoubleCast.asString(value, fromUnit, timestampFormatter));
|
156
|
+
}
|
157
|
+
else if (outputType instanceof TimestampType) {
|
158
|
+
pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value, fromUnit));
|
159
|
+
}
|
160
|
+
else if (outputType instanceof LongType) {
|
161
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
162
|
+
pageBuilder.setLong(outputColumn, DoubleCast.asLong(value, fromUnit, toUnit));
|
163
|
+
}
|
164
|
+
else if (outputType instanceof DoubleType) {
|
165
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
166
|
+
pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value, fromUnit, toUnit));
|
167
|
+
}
|
168
|
+
else {
|
169
|
+
assert false;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
84
173
|
public void setFromString(Column outputColumn, String value)
|
85
174
|
{
|
86
175
|
Type outputType = outputColumn.getType();
|
@@ -93,10 +182,12 @@ public class ColumnCaster
|
|
93
182
|
pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
|
94
183
|
}
|
95
184
|
else if (outputType instanceof LongType) {
|
96
|
-
|
185
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
186
|
+
pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser, toUnit));
|
97
187
|
}
|
98
188
|
else if (outputType instanceof DoubleType) {
|
99
|
-
|
189
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
190
|
+
pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser, toUnit));
|
100
191
|
}
|
101
192
|
else {
|
102
193
|
assert false;
|
@@ -114,10 +205,12 @@ public class ColumnCaster
|
|
114
205
|
pageBuilder.setTimestamp(outputColumn, value);
|
115
206
|
}
|
116
207
|
else if (outputType instanceof LongType) {
|
117
|
-
|
208
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
209
|
+
pageBuilder.setLong(outputColumn, TimestampCast.asLong(value, toUnit));
|
118
210
|
}
|
119
211
|
else if (outputType instanceof DoubleType) {
|
120
|
-
|
212
|
+
TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
|
213
|
+
pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value, toUnit));
|
121
214
|
}
|
122
215
|
else {
|
123
216
|
assert false;
|