embulk-filter-timestamp_format 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/CHANGELOG.md +7 -0
  4. data/README.md +46 -1
  5. data/bench/config_java.yml +14 -0
  6. data/bench/config_jruby.yml +14 -0
  7. data/bench/gen_dummy.rb +5 -0
  8. data/build.gradle +1 -1
  9. data/example/double.csv +2 -0
  10. data/example/double.yml +20 -0
  11. data/example/{json_example.jsonl → example.jsonl} +0 -0
  12. data/example/example.yml +4 -12
  13. data/example/example2.csv +2 -0
  14. data/example/example2.yml +14 -0
  15. data/example/json_double.jsonl +1 -0
  16. data/example/json_double.yml +14 -0
  17. data/example/json_long.jsonl +1 -0
  18. data/example/json_long.yml +14 -0
  19. data/example/json_string.jsonl +2 -0
  20. data/example/json_string.yml +14 -0
  21. data/example/long.csv +1 -0
  22. data/example/long.yml +20 -0
  23. data/example/string.csv +4 -0
  24. data/example/{string_example.yml → string.yml} +6 -5
  25. data/example/string_java.yml +23 -0
  26. data/example/timestamp.csv +2 -0
  27. data/example/{timestamp_example.yml → timestamp.yml} +4 -4
  28. data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +107 -14
  29. data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +104 -33
  30. data/src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java +61 -4
  31. data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +8 -0
  32. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +28 -17
  33. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +36 -5
  34. data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +57 -26
  35. data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnit.java +112 -0
  36. data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +54 -0
  37. data/src/main/java/org/embulk/filter/timestamp_format/cast/DoubleCast.java +32 -0
  38. data/src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java +32 -0
  39. data/src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java +20 -4
  40. data/src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java +5 -6
  41. data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java +192 -0
  42. metadata +29 -8
  43. data/example/json_example.yml +0 -14
  44. data/src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d7569b8adc1db79b292e271214f852fb080151b
4
- data.tar.gz: df0c01a5893dc4a4bbb1f1228e3d72b031e59f93
3
+ metadata.gz: cb84426fbf7dfbaac21925ece7475f322c7c2010
4
+ data.tar.gz: 2c5accec8470864588c20d62c42626a89a584bab
5
5
  SHA512:
6
- metadata.gz: d81f4f2df4775444b5608432a2451158453f768cf6687188afae1169ea5eb15c699141d670987e73139c14c5ae8bbeb2122fbfcb6f73c89f5e98a425db8f2519
7
- data.tar.gz: 5ecdc2f30763b7768fd1e9176c2c6b01fdafbd214f9191885e79de7f333303a9a7b86054a7c10acbf87ed458db5cab5d3b2c4871115198b829f65b8c36d855cc
6
+ metadata.gz: d77f18cde0d2a3626198d1e94aa1c7e1b6c47d3c8572c9397cf1c5ec8a4ec808fcb9dea636339c0e838e00f418cefd290d92d77b44669a6df31c1c3e2fd8b9bb
7
+ data.tar.gz: c3464272e96b244c782b65e8e536a1f64fc488646958c58d04a068550c85d718f137081662ce5fd7fe1370aa6788bda4b8598a7e0e606b7a00e6811e096177a5
data/.gitignore CHANGED
@@ -6,7 +6,7 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
- *.csv
10
9
  .tags
11
10
  .ruby-version
12
11
  *.iml
12
+ dummy.csv
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # 0.1.6 (2016-05-01)
2
+
3
+ Enhancements:
4
+
5
+ * Support unixtimestamp unit such as milli sec, micro sec, nano sec
6
+ * Support Java timestamp parser/formatter (SimpleDateFormat)
7
+
1
8
  # 0.1.5 (2016-04-29)
2
9
 
3
10
  Enhancements:
data/README.md CHANGED
@@ -13,11 +13,15 @@ A filter plugin for Embulk to change timestamp format
13
13
  - **from_timezone**: specify the timezone of the input string (string, default is default_from_timezone)
14
14
  - **to_format**: specify the format of the output string (string, default is default_to_timestamp_format)
15
15
  - **to_timezone**: specify the timezone of the output string (string, default is default_to_timezone)
16
+ - **from_unit**: specify the time unit of the input unixtimestamp (string, default is default_from_timestamp_unit)
17
+ - **to_unit**: specify the time unit of the output unixtimestamp (string, default is default_to_timestamp_unit)
16
18
  - **default_from_timestamp_format**: default timestamp format for the input string (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
17
19
  - **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
18
20
  - **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
19
21
  - **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
20
- * **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false)
22
+ - **default_from_timetamp_unit**: default time unit such as second, ms, us, ns for the input unixtimestamp (string, default is `second`)
23
+ - **default_to_timetamp_unit**: default time unit such as second, ms, us, ns for the output unixtimestamp (string, default is `second`)
24
+ - **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
21
25
 
22
26
  ## Example
23
27
 
@@ -56,6 +60,47 @@ Output will be as:
56
60
 
57
61
  See [./example](./example) for more examples.
58
62
 
63
+ ## Timestamp Parser/Formatter Performance Issue
64
+
65
+ Embulk's timestamp parser/formatter originally uses jruby implementation, but it is slow.
66
+ To improve performance, this plugin also supports Java's [SimpleDateFormat](https://docs.oracle.com/javase/jp/6/api/java/text/SimpleDateFormat.html) format as:
67
+
68
+ ```yaml
69
+ in:
70
+ type: file
71
+ path_prefix: example/example.jsonl
72
+ parser:
73
+ type: jsonl
74
+ columns:
75
+ - {name: timestamp, type: string}
76
+ - {name: nested, type: json}
77
+ filters:
78
+ - type: timestamp_format
79
+ default_from_timezone: "Asia/Taipei"
80
+ default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.SSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
81
+ default_to_timezone: "Asia/Taipei"
82
+ default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
83
+ columns:
84
+ - {name: timestamp}
85
+ - {name: $.nested.timestamp}
86
+ out:
87
+ type: stdout
88
+ ```
89
+
90
+ If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
91
+
92
+ **COMPARISON:**
93
+
94
+ Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
95
+
96
+ * jruby parser/formatter: 65.06s
97
+ * java parser/formatter: 1.3s
98
+
99
+ **NOTICE:**
100
+
101
+ * JRuby parser has micro second resolution, but Java parser (SimpleDateFormat) has only milli second resolution
102
+ * `S` requires three digits always. For example, `yyyy-MM-dd HH:mm::ss.S` for `2015-12-17 01:02:03.1` gives 001 milli seconds wrongly, but it is the specification of SimpleDateFormat.
103
+
59
104
  ## ToDo
60
105
 
61
106
  * Write test
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
12
+
13
+ out:
14
+ type: "null"
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "%Y-%m-%d"}
12
+
13
+ out:
14
+ type: "null"
@@ -0,0 +1,5 @@
1
+ File::open('bench/dummy.csv', 'w') { |f|
2
+ (1..1000000).each {
3
+ f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%L'))
4
+ }
5
+ }
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.5"
16
+ version = "0.1.6"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -0,0 +1,2 @@
1
+ 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2
2
+
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/double.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: double1, type: double}
8
+ - {name: double2, type: double}
9
+ - {name: double3, type: double}
10
+ - {name: double4, type: double}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timestamp_unit: ms
14
+ columns:
15
+ - {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
+ - {name: double2, type: timestamp}
17
+ - {name: double3, type: long}
18
+ - {name: double4, type: double}
19
+ out:
20
+ type: "null"
File without changes
data/example/example.yml CHANGED
@@ -1,22 +1,14 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string_example.csv
3
+ path_prefix: example/example.jsonl
4
4
  parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- - {name: string3, type: string}
10
- - {name: string4, type: string}
5
+ type: json
11
6
  filters:
12
7
  - type: timestamp_format
13
8
  default_to_timezone: "Asia/Tokyo"
14
9
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
15
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
16
10
  columns:
17
- - {name: string1}
18
- - {name: string2, type: timestamp}
19
- - {name: string3, type: long}
20
- - {name: string4, type: double}
11
+ - {name: "$.record.timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
12
+ - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
21
13
  out:
22
14
  type: "null"
@@ -0,0 +1,2 @@
1
+ 2015-07-12 15:00:00 UTC,2015-07-12 15:00:00 UTC
2
+ 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example2.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ filters:
10
+ - type: timestamp_format
11
+ columns:
12
+ - {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
13
+ out:
14
+ type: "null"
@@ -0,0 +1 @@
1
+ {"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_double.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_unit: ms
9
+ columns:
10
+ - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.double2, type: long}
12
+ - {name: $.record.double3, type: double}
13
+ out:
14
+ type: "null"
@@ -0,0 +1 @@
1
+ {"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_long.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_unit: ms
9
+ columns:
10
+ - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.long2, type: long}
12
+ - {name: $.record.long3, type: double}
13
+ out:
14
+ type: "null"
@@ -0,0 +1,2 @@
1
+ {"string1":"2015-07-12 15:00:00 UTC","string2":"2015-07-12 15:00:00 UTC","string3":"2015-07-12 15:00:00 UTC"}
2
+ {"string1":"2015-07-12 15:00:00.1 UTC","string2":"2015-07-12 15:00:00.1 UTC","string3":"2015-07-12 15:00:00.1 UTC"}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_string.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
9
+ columns:
10
+ - {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.string2, type: long, to_unit: ms}
12
+ - {name: $.record.string3, type: double, to_unit: ms}
13
+ out:
14
+ type: "null"
data/example/long.csv ADDED
@@ -0,0 +1 @@
1
+ 1436713200100,1436713200100,1436713200100,1436713200100
data/example/long.yml ADDED
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/long.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: long1, type: long}
8
+ - {name: long2, type: long}
9
+ - {name: long3, type: long}
10
+ - {name: long4, type: long}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timestamp_unit: ms
14
+ columns:
15
+ - {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
+ - {name: long2, type: timestamp}
17
+ - {name: long3, type: long}
18
+ - {name: long4, type: double}
19
+ out:
20
+ type: "null"
@@ -0,0 +1,4 @@
1
+ 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
2
+ 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
3
+ 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
4
+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string_example.csv
3
+ path_prefix: example/string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -10,13 +10,14 @@ in:
10
10
  - {name: string4, type: string}
11
11
  filters:
12
12
  - type: timestamp_format
13
- default_to_timezone: "Asia/Tokyo"
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
15
+ default_to_timezone: "Asia/Taipei"
14
16
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
15
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
16
17
  columns:
17
18
  - {name: string1}
18
19
  - {name: string2, type: timestamp}
19
- - {name: string3, type: long}
20
- - {name: string4, type: double}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
21
22
  out:
22
23
  type: "null"
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/string.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] # SSS must be three digit ...
15
+ default_to_timezone: "Asia/Taipei"
16
+ default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
+ columns:
18
+ - {name: string1}
19
+ - {name: string2, type: timestamp}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
22
+ out:
23
+ type: "null"
@@ -0,0 +1,2 @@
1
+ 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC
2
+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/timestamp_example.csv
3
+ path_prefix: example/timestamp.csv
4
4
  parser:
5
5
  type: csv
6
6
  default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
@@ -14,9 +14,9 @@ filters:
14
14
  default_to_timezone: "Asia/Tokyo"
15
15
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
16
16
  columns:
17
- - {name: timestamp1}
17
+ - {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
18
18
  - {name: timestamp2, type: timestamp}
19
- - {name: timestamp3, type: long}
20
- - {name: timestamp4, type: double}
19
+ - {name: timestamp3, type: long, to_unit: ms}
20
+ - {name: timestamp4, type: double, to_unit: ms}
21
21
  out:
22
22
  type: "null"
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import org.embulk.filter.timestamp_format.cast.DoubleCast;
4
+ import org.embulk.filter.timestamp_format.cast.LongCast;
3
5
  import org.embulk.filter.timestamp_format.cast.StringCast;
4
6
  import org.embulk.filter.timestamp_format.cast.TimestampCast;
5
7
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
@@ -32,6 +34,8 @@ public class ColumnCaster
32
34
  private final PageBuilder pageBuilder;
33
35
  private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
34
36
  private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
37
+ private final HashMap<String, TimestampUnit> fromTimestampUnitMap = new HashMap<>();
38
+ private final HashMap<String, TimestampUnit> toTimestampUnitMap = new HashMap<>();
35
39
  private final JsonVisitor jsonVisitor;
36
40
 
37
41
  ColumnCaster(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
@@ -44,29 +48,23 @@ public class ColumnCaster
44
48
 
45
49
  buildTimestampParserMap();
46
50
  buildTimestampFormatterMap();
51
+ buildFromTimestampUnitMap();
52
+ buildToTimestampUnitMap();
47
53
 
48
- JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap);
54
+ JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap, fromTimestampUnitMap, toTimestampUnitMap);
49
55
  this.jsonVisitor = new JsonVisitor(task, jsonCaster);
50
56
  }
51
57
 
52
58
  private void buildTimestampParserMap()
53
59
  {
54
60
  // columnName or jsonPath => TimestampParser
61
+ // we do not know input type of json here, so creates anyway
55
62
  for (ColumnConfig columnConfig : task.getColumns()) {
56
63
  TimestampParser parser = getTimestampParser(columnConfig, task);
57
64
  this.timestampParserMap.put(columnConfig.getName(), parser);
58
65
  }
59
66
  }
60
67
 
61
- private void buildTimestampFormatterMap()
62
- {
63
- // columnName or jsonPath => TimestampFormatter
64
- for (ColumnConfig columnConfig : task.getColumns()) {
65
- TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
66
- this.timestampFormatterMap.put(columnConfig.getName(), parser);
67
- }
68
- }
69
-
70
68
  private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
71
69
  {
72
70
  DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
@@ -74,6 +72,17 @@ public class ColumnCaster
74
72
  return new TimestampParser(task.getJRuby(), formatList, timezone);
75
73
  }
76
74
 
75
+ private void buildTimestampFormatterMap()
76
+ {
77
+ // columnName or jsonPath => TimestampFormatter
78
+ for (ColumnConfig columnConfig : task.getColumns()) {
79
+ if (columnConfig.getType() instanceof StringType) {
80
+ TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
81
+ this.timestampFormatterMap.put(columnConfig.getName(), parser);
82
+ }
83
+ }
84
+ }
85
+
77
86
  private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
78
87
  {
79
88
  String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
@@ -81,6 +90,86 @@ public class ColumnCaster
81
90
  return new TimestampFormatter(task.getJRuby(), format, timezone);
82
91
  }
83
92
 
93
+ private void buildFromTimestampUnitMap()
94
+ {
95
+ // columnName or jsonPath => TimestampUnit
96
+ // we do not know input type of json here, so creates anyway
97
+ for (ColumnConfig columnConfig : task.getColumns()) {
98
+ TimestampUnit unit = getFromTimestampUnit(columnConfig, task);
99
+ this.fromTimestampUnitMap.put(columnConfig.getName(), unit);
100
+ }
101
+ }
102
+
103
+ private TimestampUnit getFromTimestampUnit(ColumnConfig columnConfig, PluginTask task)
104
+ {
105
+ return columnConfig.getFromUnit().or(task.getDefaultFromTimestampUnit());
106
+ }
107
+
108
+ private void buildToTimestampUnitMap()
109
+ {
110
+ // columnName or jsonPath => TimestampUnit
111
+ for (ColumnConfig columnConfig : task.getColumns()) {
112
+ Type type = columnConfig.getType();
113
+ if (type instanceof LongType || type instanceof DoubleType) {
114
+ TimestampUnit unit = getToTimestampUnit(columnConfig, task);
115
+ this.toTimestampUnitMap.put(columnConfig.getName(), unit);
116
+ }
117
+ }
118
+ }
119
+
120
+ private TimestampUnit getToTimestampUnit(ColumnConfig columnConfig, PluginTask task)
121
+ {
122
+ return columnConfig.getToUnit().or(task.getDefaultToTimestampUnit());
123
+ }
124
+
125
+ public void setFromLong(Column outputColumn, long value)
126
+ {
127
+ Type outputType = outputColumn.getType();
128
+ TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
129
+ if (outputType instanceof StringType) {
130
+ TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
131
+ pageBuilder.setString(outputColumn, LongCast.asString(value, fromUnit, timestampFormatter));
132
+ }
133
+ else if (outputType instanceof TimestampType) {
134
+ pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value, fromUnit));
135
+ }
136
+ else if (outputType instanceof LongType) {
137
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
138
+ pageBuilder.setLong(outputColumn, LongCast.asLong(value, fromUnit, toUnit));
139
+ }
140
+ else if (outputType instanceof DoubleType) {
141
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
142
+ pageBuilder.setDouble(outputColumn, LongCast.asDouble(value, fromUnit, toUnit));
143
+ }
144
+ else {
145
+ assert false;
146
+ }
147
+ }
148
+
149
+ public void setFromDouble(Column outputColumn, double value)
150
+ {
151
+ Type outputType = outputColumn.getType();
152
+ TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
153
+ if (outputType instanceof StringType) {
154
+ TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
155
+ pageBuilder.setString(outputColumn, DoubleCast.asString(value, fromUnit, timestampFormatter));
156
+ }
157
+ else if (outputType instanceof TimestampType) {
158
+ pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value, fromUnit));
159
+ }
160
+ else if (outputType instanceof LongType) {
161
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
162
+ pageBuilder.setLong(outputColumn, DoubleCast.asLong(value, fromUnit, toUnit));
163
+ }
164
+ else if (outputType instanceof DoubleType) {
165
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
166
+ pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value, fromUnit, toUnit));
167
+ }
168
+ else {
169
+ assert false;
170
+ }
171
+ }
172
+
84
173
  public void setFromString(Column outputColumn, String value)
85
174
  {
86
175
  Type outputType = outputColumn.getType();
@@ -93,10 +182,12 @@ public class ColumnCaster
93
182
  pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
94
183
  }
95
184
  else if (outputType instanceof LongType) {
96
- pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser));
185
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
186
+ pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser, toUnit));
97
187
  }
98
188
  else if (outputType instanceof DoubleType) {
99
- pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser));
189
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
190
+ pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser, toUnit));
100
191
  }
101
192
  else {
102
193
  assert false;
@@ -114,10 +205,12 @@ public class ColumnCaster
114
205
  pageBuilder.setTimestamp(outputColumn, value);
115
206
  }
116
207
  else if (outputType instanceof LongType) {
117
- pageBuilder.setLong(outputColumn, TimestampCast.asLong(value));
208
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
209
+ pageBuilder.setLong(outputColumn, TimestampCast.asLong(value, toUnit));
118
210
  }
119
211
  else if (outputType instanceof DoubleType) {
120
- pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value));
212
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
213
+ pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value, toUnit));
121
214
  }
122
215
  else {
123
216
  assert false;