embulk-filter-timestamp_format 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0086188ce09565733308b14cebcfd784df0a6a4e
4
- data.tar.gz: 8d974fbc9276c45f07541f62f79f725b375f2d89
3
+ metadata.gz: 81e7bf1b428ed0d48164dbaaec74a2080be029be
4
+ data.tar.gz: 7c8e45fcc5cf72d8ae25eb973382e74bf3b16420
5
5
  SHA512:
6
- metadata.gz: fcbe6f58659fa857ddf7237aa9534bcb392ff21d60e1e31abe267414b570a93b44bd347839361a207add780306ceb682e6e3f14e93181b3b1741fd497078bf86
7
- data.tar.gz: 2b13d3aab703ba5dfd53f41587d20322595fee6b3af5603f8744c4955b1b32ce2525d022a46bda85d5567de8a687ef1718e3aa064094d278a20406fdf94db6c0
6
+ metadata.gz: fbae3c05eb9a2808adab605035e5ad432f9fd61df48b5b077565fcc6372a7e3413ec4ea499ddd98b9ebfbbee3f1073f7fa4a73b4743b238c9532905ee60afcfb
7
+ data.tar.gz: b3760fc88ea943d92edc776ef400563affe983718e7d66854e7093ba7a25983d9395ac3a46642ed430c5ccde4dc3845437e10145d7e4d2385e6b722fb021c311
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.8 (2016-05-10)
2
+
3
+ Enhancements:
4
+
5
+ * Support nano second resolution for Java parser
6
+
1
7
  # 0.1.7 (2016-05-09)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -99,9 +99,23 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
99
99
  * jruby parser / java formatter: 64.52s
100
100
  * jruby parser / jruby formatter: 65.06s
101
101
 
102
- **NOTICE:**
102
+ ## Nano Resolution
103
103
 
104
- * JRuby parser has micro second resolution, but Java parser (Joda-Time) has only milli second resolution
104
+ JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution (although Java8's DateTimeFormatter supports nano second resolution)
105
+
106
+ Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
107
+
108
+ ```
109
+ yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
110
+ ```
111
+
112
+ This plugin finds places of nano second from texts with regular expression `\.(\d+)`.
113
+
114
+ For formatter, you can use jruby formatter as
115
+
116
+ ```
117
+ %Y-%m-%d %H:%M:%S.%N %z
118
+ ```
105
119
 
106
120
  ## ToDo
107
121
 
@@ -8,6 +8,6 @@ in:
8
8
  filters:
9
9
  - type: timestamp_format
10
10
  columns:
11
- - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "yyyy-MM-dd"}
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSSSSSSSS"], to_format: "yyyy-MM-dd"}
12
12
  out:
13
13
  type: "null"
@@ -8,6 +8,6 @@ in:
8
8
  filters:
9
9
  - type: timestamp_format
10
10
  columns:
11
- - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSS"], to_format: "%Y-%m-%d"}
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.SSSSSSSSS"], to_format: "%Y-%m-%d"}
12
12
  out:
13
13
  type: "null"
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: bench/dummy
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: timestamp, type: string}
8
+ filters:
9
+ - type: timestamp_format
10
+ columns:
11
+ - {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.nnnnnnnnn"], to_format: "%Y-%m-%d"}
12
+ out:
13
+ type: "null"
data/bench/gen_dummy.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  File::open('bench/dummy.csv', 'w') { |f|
2
2
  (1..1000000).each {
3
- f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%L'))
3
+ f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%9N'))
4
4
  }
5
5
  }
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.7"
16
+ version = "0.1.8"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
data/example/string.csv CHANGED
@@ -3,4 +3,12 @@
3
3
  2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
4
4
  2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
5
5
  2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
6
+ 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
7
+ 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
8
+ 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
9
+ 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
10
+ 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
11
+ 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
12
+ 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
13
+ 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
6
14
 
data/example/string.yml CHANGED
@@ -11,7 +11,7 @@ in:
11
11
  filters:
12
12
  - type: timestamp_format
13
13
  default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["%Y-%m-%d", "%Y-%m-%d %z", "%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S"]
14
+ default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
15
15
  default_to_timezone: "Asia/Taipei"
16
16
  default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
17
  columns:
@@ -11,7 +11,7 @@ in:
11
11
  filters:
12
12
  - type: timestamp_format
13
13
  default_from_timezone: "Asia/Taipei"
14
- default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.S z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
14
+ default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
15
  default_to_timezone: "Asia/Taipei"
16
16
  default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
17
17
  columns:
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/string.csv
4
+ parser:
5
+ type: csv
6
+ columns:
7
+ - {name: string1, type: string}
8
+ - {name: string2, type: string}
9
+ - {name: string3, type: string}
10
+ - {name: string4, type: string}
11
+ filters:
12
+ - type: timestamp_format
13
+ default_from_timezone: "Asia/Taipei"
14
+ default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
15
+ default_to_timezone: "Asia/Taipei"
16
+ default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
17
+ columns:
18
+ - {name: string1}
19
+ - {name: string2, type: timestamp}
20
+ - {name: string3, type: long, to_unit: ms}
21
+ - {name: string4, type: double, to_unit: ms}
22
+ out:
23
+ type: "null"
@@ -22,6 +22,8 @@ import org.jruby.embed.ScriptingContainer;
22
22
  import java.util.ArrayList;
23
23
  import java.util.List;
24
24
  import java.util.Locale;
25
+ import java.util.regex.Matcher;
26
+ import java.util.regex.Pattern;
25
27
 
26
28
  import org.joda.time.format.DateTimeFormat;
27
29
 
@@ -48,7 +50,9 @@ public class TimestampParser {
48
50
 
49
51
  private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
50
52
  private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
53
+ private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
51
54
  private final DateTimeZone defaultFromTimeZone;
55
+ private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
52
56
 
53
57
  TimestampParser(PluginTask task) {
54
58
  this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
@@ -69,8 +73,18 @@ public class TimestampParser {
69
73
  JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
70
74
  this.jrubyParserList.add(helper);
71
75
  } else {
72
- DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
73
- this.javaParserList.add(parser);
76
+ // special treatment for nano resolution. n is not originally supported by Joda-Time
77
+ if (format.contains("n")) {
78
+ this.handleNanoResolutionList.add(true);
79
+ String newFormat = format.replaceAll("n", "S");
80
+ DateTimeFormatter parser = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
81
+ this.javaParserList.add(parser);
82
+ }
83
+ else {
84
+ this.handleNanoResolutionList.add(false);
85
+ DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
86
+ this.javaParserList.add(parser);
87
+ }
74
88
  }
75
89
  }
76
90
  this.defaultFromTimeZone = defaultFromTimeZone;
@@ -127,23 +141,48 @@ public class TimestampParser {
127
141
  }
128
142
 
129
143
  private Timestamp javaParse(String text) throws IllegalArgumentException {
130
- DateTime dateTime = null;
144
+ long msec = -1;
145
+ long nsec = -1;
146
+ Boolean handleNanoResolution = false;
131
147
  IllegalArgumentException exception = null;
132
148
 
133
- for (DateTimeFormatter parser : javaParserList) {
149
+ for (int i = 0; i < javaParserList.size(); i++) {
150
+ DateTimeFormatter parser = javaParserList.get(i);
151
+ handleNanoResolution = handleNanoResolutionList.get(i);
134
152
  try {
135
- dateTime = parser.parseDateTime(text);
153
+ if (handleNanoResolution) {
154
+ nsec = parseNano(text);
155
+ }
156
+ DateTime dateTime = parser.parseDateTime(text);
157
+ msec = dateTime.getMillis(); // NOTE: milli second resolution
136
158
  break;
137
159
  } catch (IllegalArgumentException ex) {
138
160
  exception = ex;
139
161
  }
140
162
  }
141
- if (dateTime == null) {
163
+ if (msec == -1) {
142
164
  throw exception;
143
165
  }
144
- long msec = dateTime.getMillis(); // NOTE: milli second resolution
145
166
 
146
- long nanoAdjustment = msec * 1000000;
147
- return Timestamp.ofEpochSecond(0, nanoAdjustment);
167
+ if (handleNanoResolution) {
168
+ long sec = msec / 1000;
169
+ return Timestamp.ofEpochSecond(sec, nsec);
170
+ }
171
+ else {
172
+ long nanoAdjustment = msec * 1000000;
173
+ return Timestamp.ofEpochSecond(0, nanoAdjustment);
174
+ }
175
+ }
176
+
177
+ private long parseNano(String text) {
178
+ long nsec = -1;
179
+ Matcher m = nanoSecPattern.matcher(text);
180
+ if (m.find()) {
181
+ //String nanoStr = String.format("%-9s", m.group(1)).replace(" ", "0");
182
+ //nsec = Long.parseLong(nanoStr);
183
+ String nanoStr = m.group(1);
184
+ nsec = Long.parseLong(nanoStr) * (long) Math.pow(10, 9 - nanoStr.length());
185
+ }
186
+ return nsec;
148
187
  }
149
188
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-09 00:00:00.000000000 Z
11
+ date: 2016-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -54,6 +54,7 @@ files:
54
54
  - bench/config_jruby.yml
55
55
  - bench/config_jruby_formatter.yml
56
56
  - bench/config_jruby_parser.yml
57
+ - bench/config_nano.yml
57
58
  - bench/gen_dummy.rb
58
59
  - build.gradle
59
60
  - config/checkstyle/checkstyle.xml
@@ -74,6 +75,7 @@ files:
74
75
  - example/string.csv
75
76
  - example/string.yml
76
77
  - example/string_java.yml
78
+ - example/string_nano.yml
77
79
  - example/timestamp.csv
78
80
  - example/timestamp.yml
79
81
  - gradle/wrapper/gradle-wrapper.jar
@@ -95,7 +97,7 @@ files:
95
97
  - src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
96
98
  - src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
97
99
  - src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
98
- - classpath/embulk-filter-timestamp_format-0.1.7.jar
100
+ - classpath/embulk-filter-timestamp_format-0.1.8.jar
99
101
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
100
102
  licenses:
101
103
  - MIT