embulk-filter-timestamp_format 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +14 -5
- data/build.gradle +2 -1
- data/example/empty.yml +9 -0
- data/example/string_auto_java.yml +24 -0
- data/example/string_nano.yml +1 -1
- data/src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java +19 -1
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java +147 -0
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java +4 -1
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +0 -2
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java +6 -0
- data/src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java +114 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c2034a4c12e1d4439ed09e907953e99ebca2ab7
|
4
|
+
data.tar.gz: 042b74ef4661bb9c1a629178df7e13c76bab03db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eda3234fc719296056ee7640dcf889dc9af2230f14db52c0bb1678d5a08f709a938bd7a8fbf796ce740a64d22f6a7bf7f7a2930a83b2d30152321e1fd485fc7a
|
7
|
+
data.tar.gz: dc28a057caaf95736d2667542253850d569f00ac2fb9181b4d9526e903a0842eefaca635ce2a41f1b5adc00863f21c3443f89905ff1da9848c8d4f2e897d8f38
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -19,9 +19,10 @@ A filter plugin for Embulk to change timestamp format
|
|
19
19
|
- **default_from_timezone**: default timezone for the input string (string, default is `UTC`)
|
20
20
|
- **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
21
21
|
- **default_to_timezone**: default timezone for the output string (string, default is `UTC`)
|
22
|
-
- **default_from_timestamp_unit**: default time unit such as `second
|
23
|
-
- **default_to_timestamp_unit**: default time unit such as `second
|
22
|
+
- **default_from_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the input unixtimestamp (string, default is `second`)
|
23
|
+
- **default_to_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the output unixtimestamp (string, default is `second`)
|
24
24
|
- **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`)
|
25
|
+
- **timestamp_parser** (experimental): set `auto_java` to try to convert ruby format to java format to use faster java timestamp parser (string, default is `auto`)
|
25
26
|
|
26
27
|
## Example
|
27
28
|
|
@@ -62,9 +63,9 @@ Output will be as:
|
|
62
63
|
|
63
64
|
See [./example](./example) for more examples.
|
64
65
|
|
65
|
-
## JRuby Timestamp Parser
|
66
|
+
## JRuby Timestamp Parser Performance Issue
|
66
67
|
|
67
|
-
Embulk's timestamp parser
|
68
|
+
Embulk's timestamp parser originally uses jruby implementation, but it is slow.
|
68
69
|
To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as:
|
69
70
|
|
70
71
|
```yaml
|
@@ -90,6 +91,10 @@ out:
|
|
90
91
|
|
91
92
|
If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used
|
92
93
|
|
94
|
+
**Auto Java timestamp format conversion** (experimental)
|
95
|
+
|
96
|
+
If you configure `timestamp_parser: auto_java`, this plugin tries to convert ruby format into java format to use faster java timestamp parser.
|
97
|
+
|
93
98
|
**COMPARISON:**
|
94
99
|
|
95
100
|
Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps:
|
@@ -99,9 +104,11 @@ Benchmark test sets are available at [./bench](./bench). In my environment (Mac
|
|
99
104
|
* jruby parser / java formatter: 64.52s
|
100
105
|
* jruby parser / jruby formatter: 65.06s
|
101
106
|
|
107
|
+
JRuby parser is slow, but JRuby formatter is not so slow.
|
108
|
+
|
102
109
|
## Nano Resolution
|
103
110
|
|
104
|
-
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution
|
111
|
+
JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution.
|
105
112
|
|
106
113
|
Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as
|
107
114
|
|
@@ -118,6 +125,8 @@ yyyy-MM-dd HH:mm:ss.nnnnnnnnn z
|
|
118
125
|
yyyy-MM-dd HH:mm:ss.nnnnnn z
|
119
126
|
```
|
120
127
|
|
128
|
+
FYI: Java8's DateTimeFormatter supports nano second resolution, but we can not use it because embulk supports Java7.
|
129
|
+
|
121
130
|
## ToDo
|
122
131
|
|
123
132
|
* Write test
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.2.0"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
@@ -22,6 +22,7 @@ dependencies {
|
|
22
22
|
provided "org.embulk:embulk-core:0.8.+"
|
23
23
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
24
24
|
testCompile "junit:junit:4.+"
|
25
|
+
testCompile "org.embulk:embulk-core:0.7.+:tests"
|
25
26
|
}
|
26
27
|
|
27
28
|
checkstyle {
|
data/example/empty.yml
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/string.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
columns:
|
7
|
+
- {name: string1, type: string}
|
8
|
+
- {name: string2, type: string}
|
9
|
+
- {name: string3, type: string}
|
10
|
+
- {name: string4, type: string}
|
11
|
+
filters:
|
12
|
+
- type: timestamp_format
|
13
|
+
default_from_timezone: "Asia/Taipei"
|
14
|
+
default_from_timestamp_format: ["%Y-%m-%d", "%Y-%m-%d %Z", "%Y-%m-%d %H:%M:%S.%N %Z", "%Y-%m-%d %H:%M:%S %Z", "%Y-%m-%d %H:%M:%S"]
|
15
|
+
default_to_timezone: "Asia/Taipei"
|
16
|
+
default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N"
|
17
|
+
timestamp_parser: auto_java
|
18
|
+
columns:
|
19
|
+
- {name: string1}
|
20
|
+
- {name: string2, type: timestamp}
|
21
|
+
- {name: string3, type: long, to_unit: ms}
|
22
|
+
- {name: string4, type: double, to_unit: ms}
|
23
|
+
out:
|
24
|
+
type: "null"
|
data/example/string_nano.yml
CHANGED
@@ -13,7 +13,7 @@ filters:
|
|
13
13
|
default_from_timezone: "Asia/Taipei"
|
14
14
|
default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"]
|
15
15
|
default_to_timezone: "Asia/Taipei"
|
16
|
-
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.
|
16
|
+
default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z"
|
17
17
|
columns:
|
18
18
|
- {name: string1}
|
19
19
|
- {name: string2, type: timestamp}
|
@@ -21,6 +21,7 @@ import org.joda.time.DateTimeZone;
|
|
21
21
|
import org.msgpack.value.Value;
|
22
22
|
import org.slf4j.Logger;
|
23
23
|
|
24
|
+
import java.util.ArrayList;
|
24
25
|
import java.util.HashMap;
|
25
26
|
import java.util.List;
|
26
27
|
|
@@ -69,7 +70,24 @@ public class ColumnCaster
|
|
69
70
|
{
|
70
71
|
DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone());
|
71
72
|
List<String> formatList = columnConfig.getFromFormat().or(task.getDefaultFromTimestampFormat());
|
72
|
-
|
73
|
+
List<String> newFormatList = new ArrayList<>(formatList);
|
74
|
+
String name = columnConfig.getName();
|
75
|
+
if (task.getTimeStampParser().equals("auto_java")) {
|
76
|
+
for (int i = 0; i < formatList.size(); i++) {
|
77
|
+
String format = formatList.get(i);
|
78
|
+
if (!format.contains("%")) {
|
79
|
+
continue;
|
80
|
+
}
|
81
|
+
String javaFormat = TimestampFormatConverter.toJavaFormat(format);
|
82
|
+
if (javaFormat == null) {
|
83
|
+
logger.info(String.format("%s: Failed to convert ruby parser to java parser: \"%s\", Use ruby parser as is", name, format));
|
84
|
+
} else {
|
85
|
+
logger.debug(String.format("%s: Convert ruby parser \"%s\" to java parser \"%s\"", name, format, javaFormat));
|
86
|
+
newFormatList.set(i, javaFormat);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return new TimestampParser(task.getJRuby(), newFormatList, timezone);
|
73
91
|
}
|
74
92
|
|
75
93
|
private void buildTimestampFormatterMap()
|
@@ -0,0 +1,147 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
// Convert JRuby Time Format into Java (Joda-Time) Format
|
4
|
+
// Aimed only for parser (JRuby format is too rich than Java Format in terms of formatter)
|
5
|
+
|
6
|
+
import java.util.HashMap;
|
7
|
+
import java.util.regex.Matcher;
|
8
|
+
import java.util.regex.Pattern;
|
9
|
+
|
10
|
+
public class TimestampFormatConverter
|
11
|
+
{
|
12
|
+
public static final HashMap<String, String> RUBY_TO_JAVA_FORMAT_TABLE = new HashMap<>();
|
13
|
+
private static final Pattern IDENTIFIER_PATTERN;
|
14
|
+
private static final Pattern NON_IDENTIFIER_PATTERN;
|
15
|
+
|
16
|
+
static
|
17
|
+
{
|
18
|
+
// %A<Friday> EEEE<Friday>
|
19
|
+
// %B<May> MMMM<May>
|
20
|
+
// %C<20> CC<20>
|
21
|
+
// %D<05/13/16> MM/dd/yy<05/13/16>
|
22
|
+
// %F<2016-05-13> yyyy-MM-dd<2016-05-13>
|
23
|
+
// %H<09> HH<09>
|
24
|
+
// %I<09> hh<09>
|
25
|
+
// %L<123> SSS<123>
|
26
|
+
// %M<02> mm<02>
|
27
|
+
// %N<123456789> nnnnnnnnn<123456789>
|
28
|
+
// %P<am> a<AM>
|
29
|
+
// %R<09:02> HH:mm<09:02>
|
30
|
+
// %S<39> ss<39>
|
31
|
+
// %T<09:02:39> HH:mm:ss<09:02:39>
|
32
|
+
// %U<19> w<19>
|
33
|
+
// %V<19> w<19>
|
34
|
+
// %W<19> w<19>
|
35
|
+
// %X<09:02:39> HH:mm:ss<09:02:39>
|
36
|
+
// %Y<2016> yyyy<2016>
|
37
|
+
// %Z<UTC> z<UTC>
|
38
|
+
// %a<Fri> EEE<Fri>
|
39
|
+
// %b<May> MMM<May>
|
40
|
+
// %c<Fri May 13 09:02:39 2016> EEE MMM dd HH:mm:ss yyyy<Fri May 13 09:02:39 2016>
|
41
|
+
// %d<13> dd<13>
|
42
|
+
// %e<13> dd<13>
|
43
|
+
// %h<May> MMM<May>
|
44
|
+
// %j<134> DDD<134>
|
45
|
+
// %k< 9> HH<09>
|
46
|
+
// %m<05> MM<05>
|
47
|
+
// %p<AM> a<AM>
|
48
|
+
// %r<09:02:39 AM> hh:mm:ss a<09:02:39 AM>
|
49
|
+
// %u<5> e<5>
|
50
|
+
// %v<13-MAY-2016> dd-MMM-yyyy<13-May-2016>
|
51
|
+
// %w<5> e<5>
|
52
|
+
// %x<05/13/16> MM/dd/yy<05/13/16>
|
53
|
+
// %y<16> yy<16>
|
54
|
+
// %z<+0000> Z<+0000>
|
55
|
+
// %:z<+00:00> Z<+0000>
|
56
|
+
// %::z<+00:00:00> Z<+0000>
|
57
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("A", "EEEE");
|
58
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("a", "EEE");
|
59
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("B", "MMMM");
|
60
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("b", "MMM");
|
61
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("C", "CC");
|
62
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("c", "EEE MMM dd HH:mm:ss yyyy");
|
63
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("D", "MM/dd/yy");
|
64
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("d", "dd");
|
65
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("e", "dd");
|
66
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("F", "yyyy-MM-dd");
|
67
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("H", "HH");
|
68
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("h", "MMM");
|
69
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("I", "hh");
|
70
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("j", "DDD");
|
71
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("k", "HH"); // " 9" fails with HH
|
72
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("L", "SSS");
|
73
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("l", "hh"); // " 9" fails with hh
|
74
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("M", "mm");
|
75
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("m", "MM");
|
76
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("n", "");
|
77
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("N", "nnnnnnnnn");
|
78
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("P", "a");
|
79
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("p", "a");
|
80
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("R", "HH:mm");
|
81
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("r", "hh:mm:ss a");
|
82
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("S", "ss");
|
83
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("s", "")); // N/A
|
84
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("T", "HH:mm:ss");
|
85
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("t", "");
|
86
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("U", "w");
|
87
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("u", "e");
|
88
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("v", "dd-MMM-yyyy");
|
89
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("V", "w");
|
90
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("W", "w");
|
91
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("w", "e");
|
92
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("X", "HH:mm:ss");
|
93
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("x", "MM/dd/yy");
|
94
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("Y", "yyyy");
|
95
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("y", "yy");
|
96
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("Z", "z");
|
97
|
+
RUBY_TO_JAVA_FORMAT_TABLE.put("z", "Z");
|
98
|
+
//RUBY_TO_JAVA_FORMAT_TABLE.put("%", "");
|
99
|
+
|
100
|
+
String[] array = RUBY_TO_JAVA_FORMAT_TABLE.keySet().toArray(new String[0]);
|
101
|
+
StringBuilder keyPatternBuilder = new StringBuilder(array[0]);
|
102
|
+
for (int i = 1; i < array.length; i++) {
|
103
|
+
keyPatternBuilder.append(array[i]);
|
104
|
+
}
|
105
|
+
IDENTIFIER_PATTERN = Pattern.compile(new StringBuilder()
|
106
|
+
.append("%[-_^#0-9:]*([")
|
107
|
+
.append(keyPatternBuilder.toString())
|
108
|
+
.append("])")
|
109
|
+
.toString());
|
110
|
+
|
111
|
+
NON_IDENTIFIER_PATTERN = Pattern.compile("(^|\\s)([^%\\s]\\S*)");
|
112
|
+
}
|
113
|
+
|
114
|
+
// @return returns null if appropriate java format is not available
|
115
|
+
public static String toJavaFormat(String rubyFormat)
|
116
|
+
{
|
117
|
+
String quotedFormat = quoteFormat(rubyFormat);
|
118
|
+
Matcher match = IDENTIFIER_PATTERN.matcher(quotedFormat);
|
119
|
+
StringBuffer buf = new StringBuffer();
|
120
|
+
while (match.find()) {
|
121
|
+
String key = match.group(1);
|
122
|
+
String replacement = RUBY_TO_JAVA_FORMAT_TABLE.get(key);
|
123
|
+
match.appendReplacement(buf, replacement);
|
124
|
+
}
|
125
|
+
match.appendTail(buf);
|
126
|
+
String javaFormat = buf.toString();
|
127
|
+
|
128
|
+
if (javaFormat.contains("%")) {
|
129
|
+
return null; // give up to use java format
|
130
|
+
}
|
131
|
+
else {
|
132
|
+
return javaFormat;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
private static String quoteFormat(String rubyFormat)
|
137
|
+
{
|
138
|
+
Matcher match = NON_IDENTIFIER_PATTERN.matcher(rubyFormat);
|
139
|
+
StringBuffer buf = new StringBuffer();
|
140
|
+
while (match.find()) {
|
141
|
+
String replacement = new StringBuilder().append(match.group(1)).append("'").append(match.group(2)).append("'").toString();
|
142
|
+
match.appendReplacement(buf, replacement);
|
143
|
+
}
|
144
|
+
match.appendTail(buf);
|
145
|
+
return buf.toString();
|
146
|
+
}
|
147
|
+
}
|
@@ -19,7 +19,6 @@ import org.embulk.spi.PageOutput;
|
|
19
19
|
import org.embulk.spi.PageReader;
|
20
20
|
import org.embulk.spi.Schema;
|
21
21
|
|
22
|
-
import org.embulk.spi.time.Timestamp;
|
23
22
|
import org.embulk.spi.type.BooleanType;
|
24
23
|
import org.embulk.spi.type.JsonType;
|
25
24
|
import org.embulk.spi.type.TimestampType;
|
@@ -66,6 +65,10 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
66
65
|
@ConfigDefault("false")
|
67
66
|
Boolean getStopOnInvalidRecord();
|
68
67
|
|
68
|
+
@Config("timestamp_parser")
|
69
|
+
@ConfigDefault("\"auto\"") // or auto_java
|
70
|
+
String getTimeStampParser();
|
71
|
+
|
69
72
|
@Config("default_from_timestamp_unit")
|
70
73
|
@ConfigDefault("\"second\"")
|
71
74
|
TimestampUnit getDefaultFromTimestampUnit();
|
@@ -16,8 +16,6 @@ import org.jruby.embed.ScriptingContainer;
|
|
16
16
|
import org.jruby.util.RubyDateFormat;
|
17
17
|
|
18
18
|
import java.util.Locale;
|
19
|
-
import java.util.regex.Matcher;
|
20
|
-
import java.util.regex.Pattern;
|
21
19
|
|
22
20
|
import org.joda.time.format.DateTimeFormat;
|
23
21
|
import org.joda.time.format.DateTimeFormatter;
|
@@ -21,14 +21,20 @@ public class TimestampUnitDeserializer
|
|
21
21
|
builder.put("MilliSecond", TimestampUnit.MilliSecond);
|
22
22
|
builder.put("millisecond", TimestampUnit.MilliSecond);
|
23
23
|
builder.put("milli_second", TimestampUnit.MilliSecond);
|
24
|
+
builder.put("milli", TimestampUnit.MilliSecond);
|
25
|
+
builder.put("msec", TimestampUnit.MilliSecond);
|
24
26
|
builder.put("ms", TimestampUnit.MilliSecond);
|
25
27
|
builder.put("MicroSecond", TimestampUnit.MicroSecond);
|
26
28
|
builder.put("microsecond", TimestampUnit.MicroSecond);
|
27
29
|
builder.put("micro_second", TimestampUnit.MicroSecond);
|
30
|
+
builder.put("micro", TimestampUnit.MicroSecond);
|
31
|
+
builder.put("usec", TimestampUnit.MicroSecond);
|
28
32
|
builder.put("us", TimestampUnit.MicroSecond);
|
29
33
|
builder.put("NanoSecond", TimestampUnit.NanoSecond);
|
30
34
|
builder.put("nanosecond", TimestampUnit.NanoSecond);
|
31
35
|
builder.put("nano_second", TimestampUnit.NanoSecond);
|
36
|
+
builder.put("nano", TimestampUnit.NanoSecond);
|
37
|
+
builder.put("nsec", TimestampUnit.NanoSecond);
|
32
38
|
builder.put("ns", TimestampUnit.NanoSecond);
|
33
39
|
stringToTimestampUnitMap = builder.build();
|
34
40
|
}
|
@@ -0,0 +1,114 @@
|
|
1
|
+
package org.embulk.filter.timestamp_format;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.joda.time.DateTimeZone;
|
7
|
+
import org.jruby.embed.ScriptingContainer;
|
8
|
+
|
9
|
+
import org.junit.Before;
|
10
|
+
import org.junit.Rule;
|
11
|
+
import org.junit.Test;
|
12
|
+
|
13
|
+
import java.util.Arrays;
|
14
|
+
import java.util.Map;
|
15
|
+
|
16
|
+
import static org.junit.Assert.assertEquals;
|
17
|
+
import static org.junit.Assert.fail;
|
18
|
+
|
19
|
+
public class TestTimestampFormatConverter
|
20
|
+
{
|
21
|
+
@Rule
|
22
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
23
|
+
public ScriptingContainer jruby;
|
24
|
+
public DateTimeZone zone;
|
25
|
+
public Timestamp timestamp;
|
26
|
+
|
27
|
+
@Before
|
28
|
+
public void createResource()
|
29
|
+
{
|
30
|
+
jruby = new ScriptingContainer();
|
31
|
+
zone = DateTimeZone.UTC;
|
32
|
+
timestamp = Timestamp.ofEpochSecond(1463130159, 123456789);
|
33
|
+
}
|
34
|
+
|
35
|
+
@Test
|
36
|
+
public void testRUBY_TO_JAVA_FORMAT_TABLE()
|
37
|
+
{
|
38
|
+
for(Map.Entry<String, String> entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) {
|
39
|
+
String rubyFormat = "%" + entry.getKey();
|
40
|
+
String javaFormat = entry.getValue();
|
41
|
+
|
42
|
+
TimestampFormatter rubyFormatter = new TimestampFormatter(jruby, rubyFormat, zone);
|
43
|
+
TimestampFormatter javaFormatter = new TimestampFormatter(jruby, javaFormat, zone);
|
44
|
+
String rubyFormatted = rubyFormatter.format(timestamp);
|
45
|
+
String javaFormatted = javaFormatter.format(timestamp);
|
46
|
+
// System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted));
|
47
|
+
|
48
|
+
TimestampParser rubyParser = new TimestampParser(jruby, Arrays.asList("." + rubyFormat), zone);
|
49
|
+
TimestampParser javaParser = new TimestampParser(jruby, Arrays.asList("." + javaFormat), zone);
|
50
|
+
Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted);
|
51
|
+
try {
|
52
|
+
Timestamp javaParsed = javaParser.parse("." + rubyFormatted);
|
53
|
+
}
|
54
|
+
catch (IllegalArgumentException ex) {
|
55
|
+
fail(String.format("Parse \"%s\" with java format \"%s\" failed (corresponding ruby format \"%s\")", rubyFormatted, javaFormat, rubyFormat));
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
@Test
|
61
|
+
public void testToJavaFormat()
|
62
|
+
{
|
63
|
+
for(Map.Entry<String, String> entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) {
|
64
|
+
String rubyFormat = "%-2" + entry.getKey();
|
65
|
+
String javaFormat = entry.getValue();
|
66
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test
|
71
|
+
public void testToJavaFormats()
|
72
|
+
{
|
73
|
+
{
|
74
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N %:z";
|
75
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z";
|
76
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
77
|
+
|
78
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
79
|
+
try {
|
80
|
+
parser.parse("2016-05-12 20:14:13.123456789 +09:00");
|
81
|
+
}
|
82
|
+
catch (IllegalArgumentException ex) {
|
83
|
+
fail();
|
84
|
+
}
|
85
|
+
}
|
86
|
+
{
|
87
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N UTC";
|
88
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'";
|
89
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
90
|
+
|
91
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
92
|
+
try {
|
93
|
+
parser.parse("2016-05-12 20:14:13.123456789 UTC");
|
94
|
+
}
|
95
|
+
catch (IllegalArgumentException ex) {
|
96
|
+
fail();
|
97
|
+
}
|
98
|
+
}
|
99
|
+
{
|
100
|
+
String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N +00:00";
|
101
|
+
String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'";
|
102
|
+
assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat));
|
103
|
+
|
104
|
+
TimestampParser parser = new TimestampParser(jruby, Arrays.asList(javaFormat), zone);
|
105
|
+
try {
|
106
|
+
parser.parse("2016-05-12 20:14:13.123456789 +00:00");
|
107
|
+
}
|
108
|
+
catch (IllegalArgumentException ex) {
|
109
|
+
fail();
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -60,6 +60,7 @@ files:
|
|
60
60
|
- config/checkstyle/checkstyle.xml
|
61
61
|
- example/double.csv
|
62
62
|
- example/double.yml
|
63
|
+
- example/empty.yml
|
63
64
|
- example/example.jsonl
|
64
65
|
- example/example.yml
|
65
66
|
- example/example2.csv
|
@@ -74,6 +75,7 @@ files:
|
|
74
75
|
- example/long.yml
|
75
76
|
- example/string.csv
|
76
77
|
- example/string.yml
|
78
|
+
- example/string_auto_java.yml
|
77
79
|
- example/string_java.yml
|
78
80
|
- example/string_nano.yml
|
79
81
|
- example/timestamp.csv
|
@@ -87,6 +89,7 @@ files:
|
|
87
89
|
- src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java
|
88
90
|
- src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java
|
89
91
|
- src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java
|
92
|
+
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java
|
90
93
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java
|
91
94
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
|
92
95
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
|
@@ -96,8 +99,9 @@ files:
|
|
96
99
|
- src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java
|
97
100
|
- src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java
|
98
101
|
- src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java
|
102
|
+
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java
|
99
103
|
- src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java
|
100
|
-
- classpath/embulk-filter-timestamp_format-0.
|
104
|
+
- classpath/embulk-filter-timestamp_format-0.2.0.jar
|
101
105
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
102
106
|
licenses:
|
103
107
|
- MIT
|