embulk-filter-timestamp_format 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +16 -2
- data/bench/config_java.yml +1 -1
- data/bench/config_jruby_formatter.yml +1 -1
- data/bench/config_nano.yml +13 -0
- data/bench/gen_dummy.rb +1 -1
- data/build.gradle +1 -1
- data/example/string.csv +8 -0
- data/example/string.yml +1 -1
- data/example/string_java.yml +1 -1
- data/example/string_nano.yml +23 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +48 -9
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81e7bf1b428ed0d48164dbaaec74a2080be029be
|
4
|
+
data.tar.gz: 7c8e45fcc5cf72d8ae25eb973382e74bf3b16420
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbae3c05eb9a2808adab605035e5ad432f9fd61df48b5b077565fcc6372a7e3413ec4ea499ddd98b9ebfbbee3f1073f7fa4a73b4743b238c9532905ee60afcfb
|
7
|
+
data.tar.gz: b3760fc88ea943d92edc776ef400563affe983718e7d66854e7093ba7a25983d9395ac3a46642ed430c5ccde4dc3845437e10145d7e4d2385e6b722fb021c311
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -99,9 +99,23 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
|
|
99
99
|
* jruby parser / java formatter: 64.52s
|
100
100
|
* jruby parser / jruby formatter: 65.06s
|
101
101
|
|
102
|
-
|
102
|
+
## Nano Resolution
|
103
103
|
|
104
|
-
|
104
|
+
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution (although Java8's DateTimeFormatter supports nano second resolution)
|
105
|
+
|
106
|
+
Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
|
107
|
+
|
108
|
+
```
|
109
|
+
yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
|
110
|
+
```
|
111
|
+
|
112
|
+
This plugin finds places of nano second from texts with regular expression `\.(\d+)`.
|
113
|
+
|
114
|
+
For formatter, you can use jruby formatter as
|
115
|
+
|
116
|
+
```
|
117
|
+
%Y-%m-%d %H:%M:%S.%N %z
|
118
|
+
```
|
105
119
|
|
106
120
|
## ToDo
|
107
121
|
|
data/bench/config_java.yml
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.nnnnnnnnn"], to_format: "%Y-%m-%d"}
|
12
|
+
out:
|
13
|
+
type: "null"
|
data/bench/gen_dummy.rb
CHANGED
data/build.gradle
CHANGED
data/example/string.csv
CHANGED
@@ -3,4 +3,12 @@
|
|
3
3
|
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
4
4
|
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
5
5
|
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
6
|
+
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
|
7
|
+
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
|
8
|
+
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
|
9
|
+
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
|
10
|
+
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
|
11
|
+
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
|
12
|
+
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
|
13
|
+
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
|
6
14
|
|
data/example/string.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["%Y-%m-%d
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
16
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
17
|
columns:
|
data/example/string_java.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
16
|
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
17
|
columns:
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
|
+
columns:
|
18
|
+
- {name: string1}
|
19
|
+
- {name: string2, type: timestamp}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
22
|
+
out:
|
23
|
+
type: "null"
|
@@ -22,6 +22,8 @@ import org.jruby.embed.ScriptingContainer;
|
|
22
22
|
import java.util.ArrayList;
|
23
23
|
import java.util.List;
|
24
24
|
import java.util.Locale;
|
25
|
+
import java.util.regex.Matcher;
|
26
|
+
import java.util.regex.Pattern;
|
25
27
|
|
26
28
|
import org.joda.time.format.DateTimeFormat;
|
27
29
|
|
@@ -48,7 +50,9 @@ public class TimestampParser {
|
|
48
50
|
|
49
51
|
private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
|
50
52
|
private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
|
53
|
+
private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
|
51
54
|
private final DateTimeZone defaultFromTimeZone;
|
55
|
+
private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
|
52
56
|
|
53
57
|
TimestampParser(PluginTask task) {
|
54
58
|
this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
|
@@ -69,8 +73,18 @@ public class TimestampParser {
|
|
69
73
|
JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
|
70
74
|
this.jrubyParserList.add(helper);
|
71
75
|
} else {
|
72
|
-
|
73
|
-
|
76
|
+
// special treatment for nano resolution. n is not originally supported by Joda-Time
|
77
|
+
if (format.contains("n")) {
|
78
|
+
this.handleNanoResolutionList.add(true);
|
79
|
+
String newFormat = format.replaceAll("n", "S");
|
80
|
+
DateTimeFormatter parser = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
|
81
|
+
this.javaParserList.add(parser);
|
82
|
+
}
|
83
|
+
else {
|
84
|
+
this.handleNanoResolutionList.add(false);
|
85
|
+
DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
|
86
|
+
this.javaParserList.add(parser);
|
87
|
+
}
|
74
88
|
}
|
75
89
|
}
|
76
90
|
this.defaultFromTimeZone = defaultFromTimeZone;
|
@@ -127,23 +141,48 @@ public class TimestampParser {
|
|
127
141
|
}
|
128
142
|
|
129
143
|
private Timestamp javaParse(String text) throws IllegalArgumentException {
|
130
|
-
|
144
|
+
long msec = -1;
|
145
|
+
long nsec = -1;
|
146
|
+
Boolean handleNanoResolution = false;
|
131
147
|
IllegalArgumentException exception = null;
|
132
148
|
|
133
|
-
for (
|
149
|
+
for (int i = 0; i < javaParserList.size(); i++) {
|
150
|
+
DateTimeFormatter parser = javaParserList.get(i);
|
151
|
+
handleNanoResolution = handleNanoResolutionList.get(i);
|
134
152
|
try {
|
135
|
-
|
153
|
+
if (handleNanoResolution) {
|
154
|
+
nsec = parseNano(text);
|
155
|
+
}
|
156
|
+
DateTime dateTime = parser.parseDateTime(text);
|
157
|
+
msec = dateTime.getMillis(); // NOTE: milli second resolution
|
136
158
|
break;
|
137
159
|
} catch (IllegalArgumentException ex) {
|
138
160
|
exception = ex;
|
139
161
|
}
|
140
162
|
}
|
141
|
-
if (
|
163
|
+
if (msec == -1) {
|
142
164
|
throw exception;
|
143
165
|
}
|
144
|
-
long msec = dateTime.getMillis(); // NOTE: milli second resolution
|
145
166
|
|
146
|
-
|
147
|
-
|
167
|
+
if (handleNanoResolution) {
|
168
|
+
long sec = msec / 1000;
|
169
|
+
return Timestamp.ofEpochSecond(sec, nsec);
|
170
|
+
}
|
171
|
+
else {
|
172
|
+
long nanoAdjustment = msec * 1000000;
|
173
|
+
return Timestamp.ofEpochSecond(0, nanoAdjustment);
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
private long parseNano(String text) {
|
178
|
+
long nsec = -1;
|
179
|
+
Matcher m = nanoSecPattern.matcher(text);
|
180
|
+
if (m.find()) {
|
181
|
+
//String nanoStr = String.format("%-9s", m.group(1)).replace(" ", "0");
|
182
|
+
//nsec = Long.parseLong(nanoStr);
|
183
|
+
String nanoStr = m.group(1);
|
184
|
+
nsec = Long.parseLong(nanoStr) * (long) Math.pow(10, 9 - nanoStr.length());
|
185
|
+
}
|
186
|
+
return nsec;
|
148
187
|
}
|
149
188
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- bench/config_jruby.yml
|
55
55
|
- bench/config_jruby_formatter.yml
|
56
56
|
- bench/config_jruby_parser.yml
|
57
|
+
- bench/config_nano.yml
|
57
58
|
- bench/gen_dummy.rb
|
58
59
|
- build.gradle
|
59
60
|
- config/checkstyle/checkstyle.xml
|
@@ -74,6 +75,7 @@ files:
|
|
74
75
|
- example/string.csv
|
75
76
|
- example/string.yml
|
76
77
|
- example/string_java.yml
|
78
|
+
- example/string_nano.yml
|
77
79
|
- example/timestamp.csv
|
78
80
|
- example/timestamp.yml
|
79
81
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -95,7 +97,7 @@ files:
|
|
95
97
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
96
98
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
97
99
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
98
|
-
- classpath/embulk-filter-timestamp_format-0.1.
|
100
|
+
- classpath/embulk-filter-timestamp_format-0.1.8.jar
|
99
101
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
100
102
|
licenses:
|
101
103
|
- MIT
|