embulk-filter-timestamp_format 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +14 -5
- data/build.gradle +2 -1
- data/example/empty.yml +9 -0
- data/example/string_auto_java.yml +24 -0
- data/example/string_nano.yml +1 -1
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +19 -1
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java +147 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +4 -1
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +0 -2
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +6 -0
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java +114 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c2034a4c12e1d4439ed09e907953e99ebca2ab7
|
4
|
+
data.tar.gz: 042b74ef4661bb9c1a629178df7e13c76bab03db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eda3234fc719296056ee7640dcf889dc9af2230f14db52c0bb1678d5a08f709a938bd7a8fbf796ce740a64d22f6a7bf7f7a2930a83b2d30152321e1fd485fc7a
|
7
|
+
data.tar.gz: dc28a057caaf95736d2667542253850d569f00ac2fb9181b4d9526e903a0842eefaca635ce2a41f1b5adc00863f21c3443f89905ff1da9848c8d4f2e897d8f38
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -19,9 +19,10 @@ A filter plugin for Embulk to change timestamp format
|
|
19
19
|
- **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
|
20
20
|
- **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
21
21
|
- **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
|
22
|
-
- **default_from_timestamp_unit**: default time unit such as `second
|
23
|
-
- **default_to_timestamp_unit**: default time unit such as `second
|
22
|
+
- **default_from_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the input unixtimestamp (string, default is `second`)
|
23
|
+
- **default_to_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the output unixtimestamp (string, default is `second`)
|
24
24
|
- **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
|
25
|
+
- **timestamp_parser** (experimental): set `auto_java` to try to convert ruby format to java format to use faster java timestamp parser (string, default is `auto`)
|
25
26
|
|
26
27
|
## Example
|
27
28
|
|
@@ -62,9 +63,9 @@ Output will be as:
|
|
62
63
|
|
63
64
|
See [./example](./example) for more examples.
|
64
65
|
|
65
|
-
## JRuby Timestamp Parser
|
66
|
+
## JRuby Timestamp Parser Performance Issue
|
66
67
|
|
67
|
-
Embulk's timestamp parser
|
68
|
+
Embulk's timestamp parser originally uses jruby implementation, but it is slow.
|
68
69
|
To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
|
69
70
|
|
70
71
|
```yaml
|
@@ -90,6 +91,10 @@ out:
|
|
90
91
|
|
91
92
|
If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
|
92
93
|
|
94
|
+
**Auto Java timestamp format conversion** (experimental)
|
95
|
+
|
96
|
+
If you configure `timestamp_parser: auto_java`, this plugin tries to convert ruby format into java format to use faster java timestamp parser.
|
97
|
+
|
93
98
|
**COMPARISON:**
|
94
99
|
|
95
100
|
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
@@ -99,9 +104,11 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
|
|
99
104
|
* jruby parser / java formatter: 64.52s
|
100
105
|
* jruby parser / jruby formatter: 65.06s
|
101
106
|
|
107
|
+
JRuby parser is slow, but JRuby formatter is not so slow.
|
108
|
+
|
102
109
|
## Nano Resolution
|
103
110
|
|
104
|
-
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution
|
111
|
+
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution.
|
105
112
|
|
106
113
|
Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
|
107
114
|
|
@@ -118,6 +125,8 @@ yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
|
|
118
125
|
yyyy-MM-dd HH:mm:ss.nnnnnn z
|
119
126
|
```
|
120
127
|
|
128
|
+
FYI: Java8's DateTimeFormatter supports nano second resolution, but we can not use it because embulk supports Java7.
|
129
|
+
|
121
130
|
## ToDo
|
122
131
|
|
123
132
|
* Write test
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.2.0"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
@@ -22,6 +22,7 @@ dependencies {
|
|
22
22
|
provided "org.embulk:embulk-core:0.8.+"
|
23
23
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
24
24
|
testCompile "junit:junit:4.+"
|
25
|
+
testCompile "org.embulk:embulk-core:0.7.+:tests"
|
25
26
|
}
|
26
27
|
|
27
28
|
checkstyle {
|
data/example/empty.yml
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d", "%Y-%m-%d %Z", "%Y-%m-%d %H:%M:%S.%N %Z", "%Y-%m-%d %H:%M:%S %Z", "%Y-%m-%d %H:%M:%S"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
|
+
timestamp_parser: auto_java
|
18
|
+
columns:
|
19
|
+
- {name: string1}
|
20
|
+
- {name: string2, type: timestamp}
|
21
|
+
- {name: string3, type: long, to_unit: ms}
|
22
|
+
- {name: string4, type: double, to_unit: ms}
|
23
|
+
out:
|
24
|
+
type: "null"
|
data/example/string_nano.yml
CHANGED
@@ -13,7 +13,7 @@ filters:
|
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
14
|
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
|
-
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.
|
16
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
|
17
17
|
columns:
|
18
18
|
- {name: string1}
|
19
19
|
- {name: string2, type: timestamp}
|
@@ -21,6 +21,7 @@ import org.joda.time.DateTimeZone;
|
|
21
21
|
import org.msgpack.value.Value;
|
22
22
|
import org.slf4j.Logger;
|
23
23
|
|
24
|
+
import java.util.ArrayList;
|
24
25
|
import java.util.HashMap;
|
25
26
|
import java.util.List;
|
26
27
|
|
@@ -69,7 +70,24 @@ public class ColumnCaster
|
|
69
70
|
{
|
70
71
|
DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
|
71
72
|
List<String> formatList = columnConfig.getFromFormat().or(task.getDefaultFromTimestampFormat());
|
72
|
-
|
73
|
+
List<String> newFormatList = new ArrayList<>(formatList);
|
74
|
+
String name = columnConfig.getName();
|
75
|
+
if (task.getTimeStampParser().equals("auto_java")) {
|
76
|
+
for (int i = 0; i < formatList.size(); i++) {
|
77
|
+
String format = formatList.get(i);
|
78
|
+
if (!format.contains("%")) {
|
79
|
+
continue;
|
80
|
+
}
|
81
|
+
String javaFormat = TimestampFormatConverter.toJavaFormat(format);
|
82
|
+
if (javaFormat == null) {
|
83
|
+
logger.info(String.format("%s: Failed to convert ruby parser to java parser: \"%s\", Use ruby parser as is", name, format));
|
84
|
+
} else {
|
85
|
+
logger.debug(String.format("%s: Convert ruby parser \"%s\" to java parser \"%s\"", name, format, javaFormat));
|
86
|
+
newFormatList.set(i, javaFormat);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return new TimestampParser(task.getJRuby(), newFormatList, timezone);
|
73
91
|
}
|
74
92
|
|
75
93
|
private void buildTimestampFormatterMap()
|
@@ -0,0 +1,147 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
// Convert JRuby Time Format into Java (Joda-Time) Format
|
4
|
+
// Aimed only for parser (JRuby format is too rich than Java Format in terms of formatter)
|
5
|
+
|
6
|
+
import java.util.HashMap;
|
7
|
+
import java.util.regex.Matcher;
|
8
|
+
import java.util.regex.Pattern;
|
9
|
+
|
10
|
+
public class TimestampFormatConverter
|
11
|
+
{
|
12
|
+
public static final HashMap<String, String> RUBY_TO_JAVA_FORMAT_TABLE = new HashMap<>();
|
13
|
+
private static final Pattern IDENTIFIER_PATTERN;
|
14
|
+
private static final Pattern NON_IDENTIFIER_PATTERN;
|
15
|
+
|
16
|
+
static
|
17
|
+
{
|
18
|
+
// %A<Friday> EEEE<Friday>
|
19
|
+
// %B<May> MMMM<May>
|
20
|
+
// %C<20> CC<20>
|
21
|
+
// %D<05/13/16> MM/dd/yy<05/13/16>
|
22
|
+
// %F<2016-05-13> yyyy-MM-dd<2016-05-13>
|
23
|
+
// %H<09> HH<09>
|
24
|
+
// %I<09> hh<09>
|
25
|
+
// %L<123> SSS<123>
|
26
|
+
// %M<02> mm<02>
|
27
|
+
// %N<123456789> nnnnnnnnn<123456789>
|
28
|
+
// %P<am> a<AM>
|
29
|
+
// %R<09:02> HH:mm<09:02>
|
30
|
+
// %S<39> ss<39>
|
31
|
+
// %T<09:02:39> HH:mm:ss<09:02:39>
|
32
|
+
// %U<19> w<19>
|
33
|
+
// %V<19> w<19>
|
34
|
+
// %W<19> w<19>
|
35
|
+
// %X<09:02:39> HH:mm:ss<09:02:39>
|
36
|
+
// %Y<2016> yyyy<2016>
|
37
|
+
// %Z<UTC> z<UTC>
|
38
|
+
// %a<Fri> EEE<Fri>
|
39
|
+
// %b<May> MMM<May>
|
40
|
+
// %c<Fri May 13 09:02:39 2016> EEE MMM dd HH:mm:ss yyyy<Fri May 13 09:02:39 2016>
|
41
|
+
// %d<13> dd<13>
|
42
|
+
// %e<13> dd<13>
|
43
|
+
// %h<May> MMM<May>
|
44
|
+
// %j<134> DDD<134>
|
45
|
+
// %k< 9> HH<09>
|
46
|
+
// %m<05> MM<05>
|
47
|
+
// %p<AM> a<AM>
|
48
|
+
// %r<09:02:39 AM> hh:mm:ss a<09:02:39 AM>
|
49
|
+
// %u<5> e<5>
|
50
|
+
// %v<13-MAY-2016> dd-MMM-yyyy<13-May-2016>
|
51
|
+
// %w<5> e<5>
|
52
|
+
// %x<05/13/16> MM/dd/yy<05/13/16>
|
53
|
+
// %y<16> yy<16>
|
54
|
+
// %z<+0000> Z<+0000>
|
55
|
+
// %:z<+00:00> Z<+0000>
|
56
|
+
// %::z<+00:00:00> Z<+0000>
|
57
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("A", "EEEE");
|
58
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("a", "EEE");
|
59
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("B", "MMMM");
|
60
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("b", "MMM");
|
61
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("C", "CC");
|
62
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("c", "EEE MMM dd HH:mm:ss yyyy");
|
63
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("D", "MM/dd/yy");
|
64
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("d", "dd");
|
65
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("e", "dd");
|
66
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("F", "yyyy-MM-dd");
|
67
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("H", "HH");
|
68
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("h", "MMM");
|
69
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("I", "hh");
|
70
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("j", "DDD");
|
71
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("k", "HH"); // " 9" fails with HH
|
72
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("L", "SSS");
|
73
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("l", "hh"); // " 9" fails with hh
|
74
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("M", "mm");
|
75
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("m", "MM");
|
76
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("n", "");
|
77
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("N", "nnnnnnnnn");
|
78
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("P", "a");
|
79
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("p", "a");
|
80
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("R", "HH:mm");
|
81
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("r", "hh:mm:ss a");
|
82
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("S", "ss");
|
83
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("s", "")); // N/A
|
84
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("T", "HH:mm:ss");
|
85
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("t", "");
|
86
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("U", "w");
|
87
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("u", "e");
|
88
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("v", "dd-MMM-yyyy");
|
89
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("V", "w");
|
90
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("W", "w");
|
91
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("w", "e");
|
92
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("X", "HH:mm:ss");
|
93
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("x", "MM/dd/yy");
|
94
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("Y", "yyyy");
|
95
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("y", "yy");
|
96
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("Z", "z");
|
97
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("z", "Z");
|
98
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("%", "");
|
99
|
+
|
100
|
+
String[] array = RUBY_TO_JAVA_FORMAT_TABLE.keySet().toArray(new String[0]);
|
101
|
+
StringBuilder keyPatternBuilder = new StringBuilder(array[0]);
|
102
|
+
for (int i = 1; i < array.length; i++) {
|
103
|
+
keyPatternBuilder.append(array[i]);
|
104
|
+
}
|
105
|
+
IDENTIFIER_PATTERN = Pattern.compile(new StringBuilder()
|
106
|
+
.append("%[-_^#0-9:]*([")
|
107
|
+
.append(keyPatternBuilder.toString())
|
108
|
+
.append("])")
|
109
|
+
.toString());
|
110
|
+
|
111
|
+
NON_IDENTIFIER_PATTERN = Pattern.compile("(^|\\s)([^%\\s]\\S*)");
|
112
|
+
}
|
113
|
+
|
114
|
+
// @return returns null if appropriate java format is not available
|
115
|
+
public static String toJavaFormat(String rubyFormat)
|
116
|
+
{
|
117
|
+
String quotedFormat = quoteFormat(rubyFormat);
|
118
|
+
Matcher match = IDENTIFIER_PATTERN.matcher(quotedFormat);
|
119
|
+
StringBuffer buf = new StringBuffer();
|
120
|
+
while (match.find()) {
|
121
|
+
String key = match.group(1);
|
122
|
+
String replacement = RUBY_TO_JAVA_FORMAT_TABLE.get(key);
|
123
|
+
match.appendReplacement(buf, replacement);
|
124
|
+
}
|
125
|
+
match.appendTail(buf);
|
126
|
+
String javaFormat = buf.toString();
|
127
|
+
|
128
|
+
if (javaFormat.contains("%")) {
|
129
|
+
return null; // give up to use java format
|
130
|
+
}
|
131
|
+
else {
|
132
|
+
return javaFormat;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
private static String quoteFormat(String rubyFormat)
|
137
|
+
{
|
138
|
+
Matcher match = NON_IDENTIFIER_PATTERN.matcher(rubyFormat);
|
139
|
+
StringBuffer buf = new StringBuffer();
|
140
|
+
while (match.find()) {
|
141
|
+
String replacement = new StringBuilder().append(match.group(1)).append("'").append(match.group(2)).append("'").toString();
|
142
|
+
match.appendReplacement(buf, replacement);
|
143
|
+
}
|
144
|
+
match.appendTail(buf);
|
145
|
+
return buf.toString();
|
146
|
+
}
|
147
|
+
}
|
@@ -19,7 +19,6 @@ import org.embulk.spi.PageOutput;
|
|
19
19
|
import org.embulk.spi.PageReader;
|
20
20
|
import org.embulk.spi.Schema;
|
21
21
|
|
22
|
-
import org.embulk.spi.time.Timestamp;
|
23
22
|
import org.embulk.spi.type.BooleanType;
|
24
23
|
import org.embulk.spi.type.JsonType;
|
25
24
|
import org.embulk.spi.type.TimestampType;
|
@@ -66,6 +65,10 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
66
65
|
@ConfigDefault("false")
|
67
66
|
Boolean getStopOnInvalidRecord();
|
68
67
|
|
68
|
+
@Config("timestamp_parser")
|
69
|
+
@ConfigDefault("\"auto\"") // or auto_java
|
70
|
+
String getTimeStampParser();
|
71
|
+
|
69
72
|
@Config("default_from_timestamp_unit")
|
70
73
|
@ConfigDefault("\"second\"")
|
71
74
|
TimestampUnit getDefaultFromTimestampUnit();
|
@@ -16,8 +16,6 @@ import org.jruby.embed.ScriptingContainer;
|
|
16
16
|
import org.jruby.util.RubyDateFormat;
|
17
17
|
|
18
18
|
import java.util.Locale;
|
19
|
-
import java.util.regex.Matcher;
|
20
|
-
import java.util.regex.Pattern;
|
21
19
|
|
22
20
|
import org.joda.time.format.DateTimeFormat;
|
23
21
|
import org.joda.time.format.DateTimeFormatter;
|
@@ -21,14 +21,20 @@ public class TimestampUnitDeserializer
|
|
21
21
|
builder.put("MilliSecond", TimestampUnit.MilliSecond);
|
22
22
|
builder.put("millisecond", TimestampUnit.MilliSecond);
|
23
23
|
builder.put("milli_second", TimestampUnit.MilliSecond);
|
24
|
+
builder.put("milli", TimestampUnit.MilliSecond);
|
25
|
+
builder.put("msec", TimestampUnit.MilliSecond);
|
24
26
|
builder.put("ms", TimestampUnit.MilliSecond);
|
25
27
|
builder.put("MicroSecond", TimestampUnit.MicroSecond);
|
26
28
|
builder.put("microsecond", TimestampUnit.MicroSecond);
|
27
29
|
builder.put("micro_second", TimestampUnit.MicroSecond);
|
30
|
+
builder.put("micro", TimestampUnit.MicroSecond);
|
31
|
+
builder.put("usec", TimestampUnit.MicroSecond);
|
28
32
|
builder.put("us", TimestampUnit.MicroSecond);
|
29
33
|
builder.put("NanoSecond", TimestampUnit.NanoSecond);
|
30
34
|
builder.put("nanosecond", TimestampUnit.NanoSecond);
|
31
35
|
builder.put("nano_second", TimestampUnit.NanoSecond);
|
36
|
+
builder.put("nano", TimestampUnit.NanoSecond);
|
37
|
+
builder.put("nsec", TimestampUnit.NanoSecond);
|
32
38
|
builder.put("ns", TimestampUnit.NanoSecond);
|
33
39
|
stringToTimestampUnitMap = builder.build();
|
34
40
|
}
|
@@ -0,0 +1,114 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.joda.time.DateTimeZone;
|
7
|
+
import org.jruby.embed.ScriptingContainer;
|
8
|
+
|
9
|
+
import org.junit.Before;
|
10
|
+
import org.junit.Rule;
|
11
|
+
import org.junit.Test;
|
12
|
+
|
13
|
+
import java.util.Arrays;
|
14
|
+
import java.util.Map;
|
15
|
+
|
16
|
+
import static org.junit.Assert.assertEquals;
|
17
|
+
import static org.junit.Assert.fail;
|
18
|
+
|
19
|
+
public class TestTimestampFormatConverter
|
20
|
+
{
|
21
|
+
@Rule
|
22
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
23
|
+
public ScriptingContainer jruby;
|
24
|
+
public DateTimeZone zone;
|
25
|
+
public Timestamp timestamp;
|
26
|
+
|
27
|
+
@Before
|
28
|
+
public void createResource()
|
29
|
+
{
|
30
|
+
jruby = new ScriptingContainer();
|
31
|
+
zone = DateTimeZone.UTC;
|
32
|
+
timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
|
33
|
+
}
|
34
|
+
|
35
|
+
@Test
|
36
|
+
public void testRUBY_TO_JAVA_FORMAT_TABLE()
|
37
|
+
{
|
38
|
+
for(Map.Entry<String, String> entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) {
|
39
|
+
String rubyFormat = "%" + entry.getKey();
|
40
|
+
String javaFormat = entry.getValue();
|
41
|
+
|
42
|
+
TimestampFormatter rubyFormatter = new TimestampFormatter(jruby, rubyFormat, zone);
|
43
|
+
TimestampFormatter javaFormatter = new TimestampFormatter(jruby, javaFormat, zone);
|
44
|
+
String rubyFormatted = rubyFormatter.format(timestamp);
|
45
|
+
String javaFormatted = javaFormatter.format(timestamp);
|
46
|
+
// System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
|
47
|
+
|
48
|
+
TimestampParser rubyParser = new TimestampParser(jruby, Arrays.asList("." + rubyFormat), zone);
|
49
|
+
TimestampParser javaParser = new TimestampParser(jruby, Arrays.asList("." + javaFormat), zone);
|
50
|
+
Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
|
51
|
+
try {
|
52
|
+
Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
|
53
|
+
}
|
54
|
+
catch (IllegalArgumentException ex) {
|
55
|
+
fail(String.format("Parse \"%s\" with java format \"%s\" failed (corresponding ruby format \"%s\")", rubyFormatted, javaFormat, rubyFormat));
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
@Test
|
61
|
+
public void testToJavaFormat()
|
62
|
+
{
|
63
|
+
for(Map.Entry<String, String> entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) {
|
64
|
+
String rubyFormat = "%-2" + entry.getKey();
|
65
|
+
String javaFormat = entry.getValue();
|
66
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test
|
71
|
+
public void testToJavaFormats()
|
72
|
+
{
|
73
|
+
{
|
74
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N %:z";
|
75
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
76
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
77
|
+
|
78
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
79
|
+
try {
|
80
|
+
parser.parse("2016-05-12 20:14:13.123456789 +09:00");
|
81
|
+
}
|
82
|
+
catch (IllegalArgumentException ex) {
|
83
|
+
fail();
|
84
|
+
}
|
85
|
+
}
|
86
|
+
{
|
87
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N UTC";
|
88
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
|
89
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
90
|
+
|
91
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
92
|
+
try {
|
93
|
+
parser.parse("2016-05-12 20:14:13.123456789 UTC");
|
94
|
+
}
|
95
|
+
catch (IllegalArgumentException ex) {
|
96
|
+
fail();
|
97
|
+
}
|
98
|
+
}
|
99
|
+
{
|
100
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N +00:00";
|
101
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
|
102
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
103
|
+
|
104
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
105
|
+
try {
|
106
|
+
parser.parse("2016-05-12 20:14:13.123456789 +00:00");
|
107
|
+
}
|
108
|
+
catch (IllegalArgumentException ex) {
|
109
|
+
fail();
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -60,6 +60,7 @@ files:
|
|
60
60
|
- config/checkstyle/checkstyle.xml
|
61
61
|
- example/double.csv
|
62
62
|
- example/double.yml
|
63
|
+
- example/empty.yml
|
63
64
|
- example/example.jsonl
|
64
65
|
- example/example.yml
|
65
66
|
- example/example2.csv
|
@@ -74,6 +75,7 @@ files:
|
|
74
75
|
- example/long.yml
|
75
76
|
- example/string.csv
|
76
77
|
- example/string.yml
|
78
|
+
- example/string_auto_java.yml
|
77
79
|
- example/string_java.yml
|
78
80
|
- example/string_nano.yml
|
79
81
|
- example/timestamp.csv
|
@@ -87,6 +89,7 @@ files:
|
|
87
89
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
|
88
90
|
- src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
|
89
91
|
- src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
|
92
|
+
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
|
90
93
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
|
91
94
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
|
92
95
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
|
@@ -96,8 +99,9 @@ files:
|
|
96
99
|
- src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java
|
97
100
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
98
101
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
102
|
+
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
99
103
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
100
|
-
- classpath/embulk-filter-timestamp_format-0.
|
104
|
+
- classpath/embulk-filter-timestamp_format-0.2.0.jar
|
101
105
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
102
106
|
licenses:
|
103
107
|
- MIT
|