embulk-filter-timestamp_format 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/CHANGELOG.md +7 -0
  4. data/README.md +46 -1
  5. data/bench/config_java.yml +14 -0
  6. data/bench/config_jruby.yml +14 -0
  7. data/bench/gen_dummy.rb +5 -0
  8. data/build.gradle +1 -1
  9. data/example/double.csv +2 -0
  10. data/example/double.yml +20 -0
  11. data/example/{json_example.jsonl → example.jsonl} +0 -0
  12. data/example/example.yml +4 -12
  13. data/example/example2.csv +2 -0
  14. data/example/example2.yml +14 -0
  15. data/example/json_double.jsonl +1 -0
  16. data/example/json_double.yml +14 -0
  17. data/example/json_long.jsonl +1 -0
  18. data/example/json_long.yml +14 -0
  19. data/example/json_string.jsonl +2 -0
  20. data/example/json_string.yml +14 -0
  21. data/example/long.csv +1 -0
  22. data/example/long.yml +20 -0
  23. data/example/string.csv +4 -0
  24. data/example/{string_example.yml → string.yml} +6 -5
  25. data/example/string_java.yml +23 -0
  26. data/example/timestamp.csv +2 -0
  27. data/example/{timestamp_example.yml → timestamp.yml} +4 -4
  28. data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +107 -14
  29. data/src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java +104 -33
  30. data/src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java +61 -4
  31. data/src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java +8 -0
  32. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +28 -17
  33. data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +36 -5
  34. data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +57 -26
  35. data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnit.java +112 -0
  36. data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +54 -0
  37. data/src/main/java/org/embulk/filter/timestamp_format/cast/DoubleCast.java +32 -0
  38. data/src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java +32 -0
  39. data/src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java +20 -4
  40. data/src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java +5 -6
  41. data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java +192 -0
  42. metadata +29 -8
  43. data/example/json_example.yml +0 -14
  44. data/src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d7569b8adc1db79b292e271214f852fb080151b
4
- data.tar.gz: df0c01a5893dc4a4bbb1f1228e3d72b031e59f93
3
+ metadata.gz: cb84426fbf7dfbaac21925ece7475f322c7c2010
4
+ data.tar.gz: 2c5accec8470864588c20d62c42626a89a584bab
5
5
  SHA512:
6
- metadata.gz: d81f4f2df4775444b5608432a2451158453f768cf6687188afae1169ea5eb15c699141d670987e73139c14c5ae8bbeb2122fbfcb6f73c89f5e98a425db8f2519
7
- data.tar.gz: 5ecdc2f30763b7768fd1e9176c2c6b01fdafbd214f9191885e79de7f333303a9a7b86054a7c10acbf87ed458db5cab5d3b2c4871115198b829f65b8c36d855cc
6
+ metadata.gz: d77f18cde0d2a3626198d1e94aa1c7e1b6c47d3c8572c9397cf1c5ec8a4ec808fcb9dea636339c0e838e00f418cefd290d92d77b44669a6df31c1c3e2fd8b9bb
7
+ data.tar.gz: c3464272e96b244c782b65e8e536a1f64fc488646958c58d04a068550c85d718f137081662ce5fd7fe1370aa6788bda4b8598a7e0e606b7a00e6811e096177a5
data/.gitignore CHANGED
@@ -6,7 +6,7 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
- *.csv
10
9
  .tags
11
10
  .ruby-version
12
11
  *.iml
12
+ dummy.csv
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # 0.1.6 (2016-05-01)
2
+
3
+ Enhancements:
4
+
5
+ * Support unixtimestamp unit such as milli sec, micro sec, nano sec
6
+ * Support Java timestamp parser/formatter (SimpleDateFormat)
7
+
1
8
  # 0.1.5 (2016-04-29)
2
9
 
3
10
  Enhancements:
data/README.md CHANGED
@@ -13,11 +13,15 @@ A filter plugin for Embulk to change timestamp format
13
13
  - **from_timezone**: specify the timezone of the input string (string, default is default_from_timezone)
14
14
  - **to_format**: specify the format of the output string (string, default is default_to_timestamp_format)
15
15
  - **to_timezone**: specify the timezone of the output string (string, default is default_to_timezone)
16
+ - **from_unit**: specify the time unit of the input unixtimestamp (string, default is default_from_timestamp_unit)
17
+ - **to_unit**: specify the time unit of the output unixtimestamp (string, default is default_to_timestamp_unit)
16
18
  - **default_from_timestamp_format**: default timestamp format for the input string (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
17
19
  - **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
18
20
  - **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
19
21
  - **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
20
- * **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false)
22
+ - **default_from_timetamp_unit**: default time unit such as second, ms, us, ns for the input unixtimestamp (string, default is `second`)
23
+ - **default_to_timetamp_unit**: default time unit such as second, ms, us, ns for the output unixtimestamp (string, default is `second`)
24
+ - **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
21
25
 
22
26
  ## Example
23
27
 
@@ -56,6 +60,47 @@ Output will be as:
56
60
 
57
61
  See [./example](./example) for more examples.
58
62
 
63
+ ## Timestamp Parser/Formatter Performance Issue
64
+
65
+ Embulk's timestamp parser/formatter originally uses jruby implementation, but it is slow.
66
+ To improve performance, this plugin also supports Java's [SimpleDateFormat](https://docs.oracle.com/javase/jp/6/api/java/text/SimpleDateFormat.html) format as:
67
+
68
+ ```yaml
69
+ in:
70
+ type: file
71
+ path_prefix: example/example.jsonl
72
+ parser:
73
+ type: jsonl
74
+ columns:
75
+ - {name: timestamp, type: string}
76
+ - {name: nested, type: json}
77
+ filters:
78
+ - type: timestamp_format
79
+ default_from_timezone: "Asia/Taipei"
80
+ default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.SSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
81
+ default_to_timezone: "Asia/Taipei"
82
+ default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
83
+ columns:
84
+ - {name: timestamp}
85
+ - {name: $.nested.timestamp}
86
+ out:
87
+ type: stdout
88
+ ```
89
+
90
+ If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
91
+
92
+ **COMPARISON:**
93
+
94
+ Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
95
+
96
+ * jruby parser/formatter: 65.06s
97
+ * java parser/formatter: 1.3s
98
+
99
+ **NOTICE:**
100
+
101
+ * JRuby parser has micro second resolution, but Java parser (SimpleDateFormat) has only milli second resolution
102
+ * `S` requires three digits always. For example, `yyyy-MM-dd HH:mm::ss.S` for `2015-12-17 01:02:03.1` gives 001 milli seconds wrongly, but it is the specification of SimpleDateFormat.
103
+
59
104
  ## ToDo
60
105
 
61
106
  * Write test
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
12
+
13
+ out:
14
+ type: "null"
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "%Y-%m-%d"}
12
+
13
+ out:
14
+ type: "null"
@@ -0,0 +1,5 @@
1
+ File::open('bench/dummy.csv', 'w') { |f|
2
+ (1..1000000).each {
3
+ f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%L'))
4
+ }
5
+ }
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.5"
16
+ version = "0.1.6"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -0,0 +1,2 @@
1
+ 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2
2
+
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/double.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: double1, type: double}
8
+ - {name: double2, type: double}
9
+ - {name: double3, type: double}
10
+ - {name: double4, type: double}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timestamp_unit: ms
14
+ columns:
15
+ - {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
+ - {name: double2, type: timestamp}
17
+ - {name: double3, type: long}
18
+ - {name: double4, type: double}
19
+ out:
20
+ type: "null"
File without changes
data/example/example.yml CHANGED
@@ -1,22 +1,14 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string_example.csv
3
+ path_prefix: example/example.jsonl
4
4
  parser:
5
- type: csv
6
- columns:
7
- - {name: string1, type: string}
8
- - {name: string2, type: string}
9
- - {name: string3, type: string}
10
- - {name: string4, type: string}
5
+ type: json
11
6
  filters:
12
7
  - type: timestamp_format
13
8
  default_to_timezone: "Asia/Tokyo"
14
9
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
15
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
16
10
  columns:
17
- - {name: string1}
18
- - {name: string2, type: timestamp}
19
- - {name: string3, type: long}
20
- - {name: string4, type: double}
11
+ - {name: "$.record.timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
12
+ - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]}
21
13
  out:
22
14
  type: "null"
@@ -0,0 +1,2 @@
1
+ 2015-07-12 15:00:00 UTC,2015-07-12 15:00:00 UTC
2
+ 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example2.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ filters:
10
+ - type: timestamp_format
11
+ columns:
12
+ - {name: string1, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_format: "%Y-%m-%m", to_timezone: "Asia/Tokyo"}
13
+ out:
14
+ type: "null"
@@ -0,0 +1 @@
1
+ {"double1":1436713200100.2,"double2":1436713200100.2,"double3":1436713200100.2}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_double.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_unit: ms
9
+ columns:
10
+ - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.double2, type: long}
12
+ - {name: $.record.double3, type: double}
13
+ out:
14
+ type: "null"
@@ -0,0 +1 @@
1
+ {"long1":1436713200100,"long2":1436713200100,"long3":1436713200100}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_long.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_unit: ms
9
+ columns:
10
+ - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.long2, type: long}
12
+ - {name: $.record.long3, type: double}
13
+ out:
14
+ type: "null"
@@ -0,0 +1,2 @@
1
+ {"string1":"2015-07-12 15:00:00 UTC","string2":"2015-07-12 15:00:00 UTC","string3":"2015-07-12 15:00:00 UTC"}
2
+ {"string1":"2015-07-12 15:00:00.1 UTC","string2":"2015-07-12 15:00:00.1 UTC","string3":"2015-07-12 15:00:00.1 UTC"}
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/json_string.jsonl
4
+ parser:
5
+ type: json
6
+ filters:
7
+ - type: timestamp_format
8
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S.%N %z"]
9
+ columns:
10
+ - {name: $.record.string1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
11
+ - {name: $.record.string2, type: long, to_unit: ms}
12
+ - {name: $.record.string3, type: double, to_unit: ms}
13
+ out:
14
+ type: "null"
data/example/long.csv ADDED
@@ -0,0 +1 @@
1
+ 1436713200100,1436713200100,1436713200100,1436713200100
data/example/long.yml ADDED
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/long.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: long1, type: long}
8
+ - {name: long2, type: long}
9
+ - {name: long3, type: long}
10
+ - {name: long4, type: long}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timestamp_unit: ms
14
+ columns:
15
+ - {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"}
16
+ - {name: long2, type: timestamp}
17
+ - {name: long3, type: long}
18
+ - {name: long4, type: double}
19
+ out:
20
+ type: "null"
@@ -0,0 +1,4 @@
1
+ 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
2
+ 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
3
+ 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
4
+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/string_example.csv
3
+ path_prefix: example/string.csv
4
4
  parser:
5
5
  type: csv
6
6
  columns:
@@ -10,13 +10,14 @@ in:
10
10
  - {name: string4, type: string}
11
11
  filters:
12
12
  - type: timestamp_format
13
- default_to_timezone: "Asia/Tokyo"
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
15
+ default_to_timezone: "Asia/Taipei"
14
16
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
15
- default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]
16
17
  columns:
17
18
  - {name: string1}
18
19
  - {name: string2, type: timestamp}
19
- - {name: string3, type: long}
20
- - {name: string4, type: double}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
21
22
  out:
22
23
  type: "null"
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/string.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] # SSS must be three digit ...
15
+ default_to_timezone: "Asia/Taipei"
16
+ default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
+ columns:
18
+ - {name: string1}
19
+ - {name: string2, type: timestamp}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
22
+ out:
23
+ type: "null"
@@ -0,0 +1,2 @@
1
+ 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC
2
+
@@ -1,6 +1,6 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: example/timestamp_example.csv
3
+ path_prefix: example/timestamp.csv
4
4
  parser:
5
5
  type: csv
6
6
  default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z"
@@ -14,9 +14,9 @@ filters:
14
14
  default_to_timezone: "Asia/Tokyo"
15
15
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
16
16
  columns:
17
- - {name: timestamp1}
17
+ - {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"}
18
18
  - {name: timestamp2, type: timestamp}
19
- - {name: timestamp3, type: long}
20
- - {name: timestamp4, type: double}
19
+ - {name: timestamp3, type: long, to_unit: ms}
20
+ - {name: timestamp4, type: double, to_unit: ms}
21
21
  out:
22
22
  type: "null"
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
+ import org.embulk.filter.timestamp_format.cast.DoubleCast;
4
+ import org.embulk.filter.timestamp_format.cast.LongCast;
3
5
  import org.embulk.filter.timestamp_format.cast.StringCast;
4
6
  import org.embulk.filter.timestamp_format.cast.TimestampCast;
5
7
  import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig;
@@ -32,6 +34,8 @@ public class ColumnCaster
32
34
  private final PageBuilder pageBuilder;
33
35
  private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
34
36
  private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
37
+ private final HashMap<String, TimestampUnit> fromTimestampUnitMap = new HashMap<>();
38
+ private final HashMap<String, TimestampUnit> toTimestampUnitMap = new HashMap<>();
35
39
  private final JsonVisitor jsonVisitor;
36
40
 
37
41
  ColumnCaster(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
@@ -44,29 +48,23 @@ public class ColumnCaster
44
48
 
45
49
  buildTimestampParserMap();
46
50
  buildTimestampFormatterMap();
51
+ buildFromTimestampUnitMap();
52
+ buildToTimestampUnitMap();
47
53
 
48
- JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap);
54
+ JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap, fromTimestampUnitMap, toTimestampUnitMap);
49
55
  this.jsonVisitor = new JsonVisitor(task, jsonCaster);
50
56
  }
51
57
 
52
58
  private void buildTimestampParserMap()
53
59
  {
54
60
  // columnName or jsonPath => TimestampParser
61
+ // we do not know input type of json here, so creates anyway
55
62
  for (ColumnConfig columnConfig : task.getColumns()) {
56
63
  TimestampParser parser = getTimestampParser(columnConfig, task);
57
64
  this.timestampParserMap.put(columnConfig.getName(), parser);
58
65
  }
59
66
  }
60
67
 
61
- private void buildTimestampFormatterMap()
62
- {
63
- // columnName or jsonPath => TimestampFormatter
64
- for (ColumnConfig columnConfig : task.getColumns()) {
65
- TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
66
- this.timestampFormatterMap.put(columnConfig.getName(), parser);
67
- }
68
- }
69
-
70
68
  private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
71
69
  {
72
70
  DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
@@ -74,6 +72,17 @@ public class ColumnCaster
74
72
  return new TimestampParser(task.getJRuby(), formatList, timezone);
75
73
  }
76
74
 
75
+ private void buildTimestampFormatterMap()
76
+ {
77
+ // columnName or jsonPath => TimestampFormatter
78
+ for (ColumnConfig columnConfig : task.getColumns()) {
79
+ if (columnConfig.getType() instanceof StringType) {
80
+ TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
81
+ this.timestampFormatterMap.put(columnConfig.getName(), parser);
82
+ }
83
+ }
84
+ }
85
+
77
86
  private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
78
87
  {
79
88
  String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat());
@@ -81,6 +90,86 @@ public class ColumnCaster
81
90
  return new TimestampFormatter(task.getJRuby(), format, timezone);
82
91
  }
83
92
 
93
+ private void buildFromTimestampUnitMap()
94
+ {
95
+ // columnName or jsonPath => TimestampUnit
96
+ // we do not know input type of json here, so creates anyway
97
+ for (ColumnConfig columnConfig : task.getColumns()) {
98
+ TimestampUnit unit = getFromTimestampUnit(columnConfig, task);
99
+ this.fromTimestampUnitMap.put(columnConfig.getName(), unit);
100
+ }
101
+ }
102
+
103
+ private TimestampUnit getFromTimestampUnit(ColumnConfig columnConfig, PluginTask task)
104
+ {
105
+ return columnConfig.getFromUnit().or(task.getDefaultFromTimestampUnit());
106
+ }
107
+
108
+ private void buildToTimestampUnitMap()
109
+ {
110
+ // columnName or jsonPath => TimestampUnit
111
+ for (ColumnConfig columnConfig : task.getColumns()) {
112
+ Type type = columnConfig.getType();
113
+ if (type instanceof LongType || type instanceof DoubleType) {
114
+ TimestampUnit unit = getToTimestampUnit(columnConfig, task);
115
+ this.toTimestampUnitMap.put(columnConfig.getName(), unit);
116
+ }
117
+ }
118
+ }
119
+
120
+ private TimestampUnit getToTimestampUnit(ColumnConfig columnConfig, PluginTask task)
121
+ {
122
+ return columnConfig.getToUnit().or(task.getDefaultToTimestampUnit());
123
+ }
124
+
125
+ public void setFromLong(Column outputColumn, long value)
126
+ {
127
+ Type outputType = outputColumn.getType();
128
+ TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
129
+ if (outputType instanceof StringType) {
130
+ TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
131
+ pageBuilder.setString(outputColumn, LongCast.asString(value, fromUnit, timestampFormatter));
132
+ }
133
+ else if (outputType instanceof TimestampType) {
134
+ pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value, fromUnit));
135
+ }
136
+ else if (outputType instanceof LongType) {
137
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
138
+ pageBuilder.setLong(outputColumn, LongCast.asLong(value, fromUnit, toUnit));
139
+ }
140
+ else if (outputType instanceof DoubleType) {
141
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
142
+ pageBuilder.setDouble(outputColumn, LongCast.asDouble(value, fromUnit, toUnit));
143
+ }
144
+ else {
145
+ assert false;
146
+ }
147
+ }
148
+
149
+ public void setFromDouble(Column outputColumn, double value)
150
+ {
151
+ Type outputType = outputColumn.getType();
152
+ TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName());
153
+ if (outputType instanceof StringType) {
154
+ TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
155
+ pageBuilder.setString(outputColumn, DoubleCast.asString(value, fromUnit, timestampFormatter));
156
+ }
157
+ else if (outputType instanceof TimestampType) {
158
+ pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value, fromUnit));
159
+ }
160
+ else if (outputType instanceof LongType) {
161
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
162
+ pageBuilder.setLong(outputColumn, DoubleCast.asLong(value, fromUnit, toUnit));
163
+ }
164
+ else if (outputType instanceof DoubleType) {
165
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
166
+ pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value, fromUnit, toUnit));
167
+ }
168
+ else {
169
+ assert false;
170
+ }
171
+ }
172
+
84
173
  public void setFromString(Column outputColumn, String value)
85
174
  {
86
175
  Type outputType = outputColumn.getType();
@@ -93,10 +182,12 @@ public class ColumnCaster
93
182
  pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
94
183
  }
95
184
  else if (outputType instanceof LongType) {
96
- pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser));
185
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
186
+ pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser, toUnit));
97
187
  }
98
188
  else if (outputType instanceof DoubleType) {
99
- pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser));
189
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
190
+ pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser, toUnit));
100
191
  }
101
192
  else {
102
193
  assert false;
@@ -114,10 +205,12 @@ public class ColumnCaster
114
205
  pageBuilder.setTimestamp(outputColumn, value);
115
206
  }
116
207
  else if (outputType instanceof LongType) {
117
- pageBuilder.setLong(outputColumn, TimestampCast.asLong(value));
208
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
209
+ pageBuilder.setLong(outputColumn, TimestampCast.asLong(value, toUnit));
118
210
  }
119
211
  else if (outputType instanceof DoubleType) {
120
- pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value));
212
+ TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName());
213
+ pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value, toUnit));
121
214
  }
122
215
  else {
123
216
  assert false;