embulk-filter-timestamp_format 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +16 -2
- data/bench/config_java.yml +1 -1
- data/bench/config_jruby_formatter.yml +1 -1
- data/bench/config_nano.yml +13 -0
- data/bench/gen_dummy.rb +1 -1
- data/build.gradle +1 -1
- data/example/string.csv +8 -0
- data/example/string.yml +1 -1
- data/example/string_java.yml +1 -1
- data/example/string_nano.yml +23 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +48 -9
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81e7bf1b428ed0d48164dbaaec74a2080be029be
|
4
|
+
data.tar.gz: 7c8e45fcc5cf72d8ae25eb973382e74bf3b16420
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbae3c05eb9a2808adab605035e5ad432f9fd61df48b5b077565fcc6372a7e3413ec4ea499ddd98b9ebfbbee3f1073f7fa4a73b4743b238c9532905ee60afcfb
|
7
|
+
data.tar.gz: b3760fc88ea943d92edc776ef400563affe983718e7d66854e7093ba7a25983d9395ac3a46642ed430c5ccde4dc3845437e10145d7e4d2385e6b722fb021c311
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -99,9 +99,23 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
|
|
99
99
|
* jruby parser / java formatter: 64.52s
|
100
100
|
* jruby parser / jruby formatter: 65.06s
|
101
101
|
|
102
|
-
|
102
|
+
## Nano Resolution
|
103
103
|
|
104
|
-
|
104
|
+
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution (although Java8's DateTimeFormatter supports nano second resolution)
|
105
|
+
|
106
|
+
Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
|
107
|
+
|
108
|
+
```
|
109
|
+
yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
|
110
|
+
```
|
111
|
+
|
112
|
+
This plugin finds places of nano second from texts with regular expression `\.(\d+)`.
|
113
|
+
|
114
|
+
For formatter, you can use jruby formatter as
|
115
|
+
|
116
|
+
```
|
117
|
+
%Y-%m-%d %H:%M:%S.%N %z
|
118
|
+
```
|
105
119
|
|
106
120
|
## ToDo
|
107
121
|
|
data/bench/config_java.yml
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: bench/dummy
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: timestamp, type: string}
|
8
|
+
filters:
|
9
|
+
- type: timestamp_format
|
10
|
+
columns:
|
11
|
+
- {name: timestamp, from_format: ["yyyy-MM-dd hh:mm:ss.nnnnnnnnn"], to_format: "%Y-%m-%d"}
|
12
|
+
out:
|
13
|
+
type: "null"
|
data/bench/gen_dummy.rb
CHANGED
data/build.gradle
CHANGED
data/example/string.csv
CHANGED
@@ -3,4 +3,12 @@
|
|
3
3
|
2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00
|
4
4
|
2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC
|
5
5
|
2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC
|
6
|
+
2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC
|
7
|
+
2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC
|
8
|
+
2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC
|
9
|
+
2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC
|
10
|
+
2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC
|
11
|
+
2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC
|
12
|
+
2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC
|
13
|
+
2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC
|
6
14
|
|
data/example/string.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["%Y-%m-%d
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
16
|
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
17
|
columns:
|
data/example/string_java.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
filters:
|
12
12
|
- type: timestamp_format
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
|
-
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.SSSSSSSSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
16
|
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z"
|
17
17
|
columns:
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
|
+
columns:
|
18
|
+
- {name: string1}
|
19
|
+
- {name: string2, type: timestamp}
|
20
|
+
- {name: string3, type: long, to_unit: ms}
|
21
|
+
- {name: string4, type: double, to_unit: ms}
|
22
|
+
out:
|
23
|
+
type: "null"
|
@@ -22,6 +22,8 @@ import org.jruby.embed.ScriptingContainer;
|
|
22
22
|
import java.util.ArrayList;
|
23
23
|
import java.util.List;
|
24
24
|
import java.util.Locale;
|
25
|
+
import java.util.regex.Matcher;
|
26
|
+
import java.util.regex.Pattern;
|
25
27
|
|
26
28
|
import org.joda.time.format.DateTimeFormat;
|
27
29
|
|
@@ -48,7 +50,9 @@ public class TimestampParser {
|
|
48
50
|
|
49
51
|
private final List<JRubyTimeParserHelper> jrubyParserList = new ArrayList<>();
|
50
52
|
private final List<DateTimeFormatter> javaParserList = new ArrayList<>();
|
53
|
+
private final List<Boolean> handleNanoResolutionList = new ArrayList<>();
|
51
54
|
private final DateTimeZone defaultFromTimeZone;
|
55
|
+
private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)");
|
52
56
|
|
53
57
|
TimestampParser(PluginTask task) {
|
54
58
|
this(task.getJRuby(), task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone());
|
@@ -69,8 +73,18 @@ public class TimestampParser {
|
|
69
73
|
JRubyTimeParserHelper helper = (JRubyTimeParserHelper) helperFactory.newInstance(format, 1970, 1, 1, 0, 0, 0, 0); // TODO default time zone
|
70
74
|
this.jrubyParserList.add(helper);
|
71
75
|
} else {
|
72
|
-
|
73
|
-
|
76
|
+
// special treatment for nano resolution. n is not originally supported by Joda-Time
|
77
|
+
if (format.contains("n")) {
|
78
|
+
this.handleNanoResolutionList.add(true);
|
79
|
+
String newFormat = format.replaceAll("n", "S");
|
80
|
+
DateTimeFormatter parser = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
|
81
|
+
this.javaParserList.add(parser);
|
82
|
+
}
|
83
|
+
else {
|
84
|
+
this.handleNanoResolutionList.add(false);
|
85
|
+
DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone);
|
86
|
+
this.javaParserList.add(parser);
|
87
|
+
}
|
74
88
|
}
|
75
89
|
}
|
76
90
|
this.defaultFromTimeZone = defaultFromTimeZone;
|
@@ -127,23 +141,48 @@ public class TimestampParser {
|
|
127
141
|
}
|
128
142
|
|
129
143
|
private Timestamp javaParse(String text) throws IllegalArgumentException {
|
130
|
-
|
144
|
+
long msec = -1;
|
145
|
+
long nsec = -1;
|
146
|
+
Boolean handleNanoResolution = false;
|
131
147
|
IllegalArgumentException exception = null;
|
132
148
|
|
133
|
-
for (
|
149
|
+
for (int i = 0; i < javaParserList.size(); i++) {
|
150
|
+
DateTimeFormatter parser = javaParserList.get(i);
|
151
|
+
handleNanoResolution = handleNanoResolutionList.get(i);
|
134
152
|
try {
|
135
|
-
|
153
|
+
if (handleNanoResolution) {
|
154
|
+
nsec = parseNano(text);
|
155
|
+
}
|
156
|
+
DateTime dateTime = parser.parseDateTime(text);
|
157
|
+
msec = dateTime.getMillis(); // NOTE: milli second resolution
|
136
158
|
break;
|
137
159
|
} catch (IllegalArgumentException ex) {
|
138
160
|
exception = ex;
|
139
161
|
}
|
140
162
|
}
|
141
|
-
if (
|
163
|
+
if (msec == -1) {
|
142
164
|
throw exception;
|
143
165
|
}
|
144
|
-
long msec = dateTime.getMillis(); // NOTE: milli second resolution
|
145
166
|
|
146
|
-
|
147
|
-
|
167
|
+
if (handleNanoResolution) {
|
168
|
+
long sec = msec / 1000;
|
169
|
+
return Timestamp.ofEpochSecond(sec, nsec);
|
170
|
+
}
|
171
|
+
else {
|
172
|
+
long nanoAdjustment = msec * 1000000;
|
173
|
+
return Timestamp.ofEpochSecond(0, nanoAdjustment);
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
private long parseNano(String text) {
|
178
|
+
long nsec = -1;
|
179
|
+
Matcher m = nanoSecPattern.matcher(text);
|
180
|
+
if (m.find()) {
|
181
|
+
//String nanoStr = String.format("%-9s", m.group(1)).replace(" ", "0");
|
182
|
+
//nsec = Long.parseLong(nanoStr);
|
183
|
+
String nanoStr = m.group(1);
|
184
|
+
nsec = Long.parseLong(nanoStr) * (long) Math.pow(10, 9 - nanoStr.length());
|
185
|
+
}
|
186
|
+
return nsec;
|
148
187
|
}
|
149
188
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- bench/config_jruby.yml
|
55
55
|
- bench/config_jruby_formatter.yml
|
56
56
|
- bench/config_jruby_parser.yml
|
57
|
+
- bench/config_nano.yml
|
57
58
|
- bench/gen_dummy.rb
|
58
59
|
- build.gradle
|
59
60
|
- config/checkstyle/checkstyle.xml
|
@@ -74,6 +75,7 @@ files:
|
|
74
75
|
- example/string.csv
|
75
76
|
- example/string.yml
|
76
77
|
- example/string_java.yml
|
78
|
+
- example/string_nano.yml
|
77
79
|
- example/timestamp.csv
|
78
80
|
- example/timestamp.yml
|
79
81
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -95,7 +97,7 @@ files:
|
|
95
97
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
96
98
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
97
99
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
98
|
-
- classpath/embulk-filter-timestamp_format-0.1.
|
100
|
+
- classpath/embulk-filter-timestamp_format-0.1.8.jar
|
99
101
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
100
102
|
licenses:
|
101
103
|
- MIT
|