embulk-filter-timestamp_format 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0086188ce09565733308b14cebcfd784df0a6a4e
4
- data.tar.gz: 8d974fbc9276c45f07541f62f79f725b375f2d89
3
+ metadata.gz: 81e7bf1b428ed0d48164dbaaec74a2080be029be
4
+ data.tar.gz: 7c8e45fcc5cf72d8ae25eb973382e74bf3b16420
5
5
  SHA512:
6
- metadata.gz: fcbe6f58659fa857ddf7237aa9534bcb392ff21d60e1e31abe267414b570a93b44bd347839361a207add780306ceb682e6e3f14e93181b3b1741fd497078bf86
7
- data.tar.gz: 2b13d3aab703ba5dfd53f41587d20322595fee6b3af5603f8744c4955b1b32ce2525d022a46bda85d5567de8a687ef1718e3aa064094d278a20406fdf94db6c0
6
+ metadata.gz: fbae3c05eb9a2808adab605035e5ad432f9fd61df48b5b077565fcc6372a7e3413ec4ea499ddd98b9ebfbbee3f1073f7fa4a73b4743b238c9532905ee60afcfb
7
+ data.tar.gz: b3760fc88ea943d92edc776ef400563affe983718e7d66854e7093ba7a25983d9395ac3a46642ed430c5ccde4dc3845437e10145d7e4d2385e6b722fb021c311
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.8 (2016-05-10)
2
+
3
+ Enhancements:
4
+
5
+ * Support nano second resolution for Java parser
6
+
1
7
  # 0.1.7 (2016-05-09)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -99,9 +99,23 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
99
99
  * jruby parser / java formatter: 64.52s
100
100
  * jruby parser / jruby formatter: 65.06s
101
101
 
102
- **NOTICE:**
102
+ ## Nano Resolution
103
103
 
104
- * JRuby parser has micro second resolution, but Java parser (Joda-Time) has only milli second resolution
104
+ JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution (although Java8's DateTimeFormatter supports nano second resolution)
105
+
106
+ Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
107
+
108
+ ```
109
+ yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
110
+ ```
111
+
112
+ This plugin finds places of nano second from texts with regular expression `\.(\d+)`.
113
+
114
+ For formatter, you can use jruby formatter as
115
+
116
+ ```
117
+ %Y-%m-%d %H:%M:%S.%N %z
118
+ ```
105
119
 
106
120
  ## ToDo
107
121
 
@@ -8,6 +8,6 @@ in:
8
8
  filters:
9
9
  - type: timestamp_format
10
10
  columns:
11
- - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSSSSSSSS"], to_format: "yyyy-MM-dd"}
12
12
  out:
13
13
  type: "null"
@@ -8,6 +8,6 @@ in:
8
8
  filters:
9
9
  - type: timestamp_format
10
10
  columns:
11
- - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "%Y-%m-%d"}
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSSSSSSSS"], to_format: "%Y-%m-%d"}
12
12
  out:
13
13
  type: "null"
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.nnnnnnnnn"], to_format: "%Y-%m-%d"}
12
+ out:
13
+ type: "null"
data/bench/gen_dummy.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  File::open('bench/dummy.csv', 'w') { |f|
2
2
  (1..1000000).each {
3
- f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%L'))
3
+ f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%9N'))
4
4
  }
5
5
  }
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.7"
16
+ version = "0.1.8"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
data/example/string.csv CHANGED
@@ -3,4 +3,12 @@
3
3
  2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
4
4
  2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
5
5
  2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
6
+ 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
7
+ 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
8
+ 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
9
+ 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
10
+ 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
11
+ 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
12
+ 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
13
+ 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
6
14
 
data/example/string.yml CHANGED
@@ -11,7 +11,7 @@ in:
11
11
  filters:
12
12
  - type: timestamp_format
13
13
  default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["%Y-%m-%d", "%Y-%m-%d %z", "%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
14
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
15
15
  default_to_timezone: "Asia/Taipei"
16
16
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
17
  columns:
@@ -11,7 +11,7 @@ in:
11
11
  filters:
12
12
  - type: timestamp_format
13
13
  default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
14
+ default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
15
  default_to_timezone: "Asia/Taipei"
16
16
  default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
17
  columns:
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/string.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
+ default_to_timezone: "Asia/Taipei"
16
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
+ columns:
18
+ - {name: string1}
19
+ - {name: string2, type: timestamp}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
22
+ out:
23
+ type: "null"
@@ -22,6 +22,8 @@ import org.jruby.embed.ScriptingContainer;
22
22
  import java.util.ArrayList;
23
23
  import java.util.List;
24
24
  import java.util.Locale;
25
+ import java.util.regex.Matcher;
26
+ import java.util.regex.Pattern;
25
27
 
26
28
  import org.joda.time.format.DateTimeFormat;
27
29
 
@@ -48,7 +50,9 @@ public class TimestampParser {
48
50
 
49
51
  private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
50
52
  private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
53
+ private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
51
54
  private final DateTimeZone defaultFromTimeZone;
55
+ private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
52
56
 
53
57
  TimestampParser(PluginTask task) {
54
58
  this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
@@ -69,8 +73,18 @@ public class TimestampParser {
69
73
  JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
70
74
  this.jrubyParserList.add(helper);
71
75
  } else {
72
- DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
73
- this.javaParserList.add(parser);
76
+ // special treatment for nano resolution. n is not originally supported by Joda-Time
77
+ if (format.contains("n")) {
78
+ this.handleNanoResolutionList.add(true);
79
+ String newFormat = format.replaceAll("n", "S");
80
+ DateTimeFormatter parser = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
81
+ this.javaParserList.add(parser);
82
+ }
83
+ else {
84
+ this.handleNanoResolutionList.add(false);
85
+ DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
86
+ this.javaParserList.add(parser);
87
+ }
74
88
  }
75
89
  }
76
90
  this.defaultFromTimeZone = defaultFromTimeZone;
@@ -127,23 +141,48 @@ public class TimestampParser {
127
141
  }
128
142
 
129
143
  private Timestamp javaParse(String text) throws IllegalArgumentException {
130
- DateTime dateTime = null;
144
+ long msec = -1;
145
+ long nsec = -1;
146
+ Boolean handleNanoResolution = false;
131
147
  IllegalArgumentException exception = null;
132
148
 
133
- for (DateTimeFormatter parser : javaParserList) {
149
+ for (int i = 0; i < javaParserList.size(); i++) {
150
+ DateTimeFormatter parser = javaParserList.get(i);
151
+ handleNanoResolution = handleNanoResolutionList.get(i);
134
152
  try {
135
- dateTime = parser.parseDateTime(text);
153
+ if (handleNanoResolution) {
154
+ nsec = parseNano(text);
155
+ }
156
+ DateTime dateTime = parser.parseDateTime(text);
157
+ msec = dateTime.getMillis(); // NOTE: milli second resolution
136
158
  break;
137
159
  } catch (IllegalArgumentException ex) {
138
160
  exception = ex;
139
161
  }
140
162
  }
141
- if (dateTime == null) {
163
+ if (msec == -1) {
142
164
  throw exception;
143
165
  }
144
- long msec = dateTime.getMillis(); // NOTE: milli second resolution
145
166
 
146
- long nanoAdjustment = msec * 1000000;
147
- return Timestamp.ofEpochSecond(0, nanoAdjustment);
167
+ if (handleNanoResolution) {
168
+ long sec = msec / 1000;
169
+ return Timestamp.ofEpochSecond(sec, nsec);
170
+ }
171
+ else {
172
+ long nanoAdjustment = msec * 1000000;
173
+ return Timestamp.ofEpochSecond(0, nanoAdjustment);
174
+ }
175
+ }
176
+
177
+ private long parseNano(String text) {
178
+ long nsec = -1;
179
+ Matcher m = nanoSecPattern.matcher(text);
180
+ if (m.find()) {
181
+ //String nanoStr = String.format("%-9s", m.group(1)).replace(" ", "0");
182
+ //nsec = Long.parseLong(nanoStr);
183
+ String nanoStr = m.group(1);
184
+ nsec = Long.parseLong(nanoStr) * (long) Math.pow(10, 9 - nanoStr.length());
185
+ }
186
+ return nsec;
148
187
  }
149
188
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-09 00:00:00.000000000 Z
11
+ date: 2016-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -54,6 +54,7 @@ files:
54
54
  - bench/config_jruby.yml
55
55
  - bench/config_jruby_formatter.yml
56
56
  - bench/config_jruby_parser.yml
57
+ - bench/config_nano.yml
57
58
  - bench/gen_dummy.rb
58
59
  - build.gradle
59
60
  - config/checkstyle/checkstyle.xml
@@ -74,6 +75,7 @@ files:
74
75
  - example/string.csv
75
76
  - example/string.yml
76
77
  - example/string_java.yml
78
+ - example/string_nano.yml
77
79
  - example/timestamp.csv
78
80
  - example/timestamp.yml
79
81
  - gradle/wrapper/gradle-wrapper.jar
@@ -95,7 +97,7 @@ files:
95
97
  - src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
96
98
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
97
99
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
98
- - classpath/embulk-filter-timestamp_format-0.1.7.jar
100
+ - classpath/embulk-filter-timestamp_format-0.1.8.jar
99
101
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
100
102
  licenses:
101
103
  - MIT