embulk-filter-timestamp_format 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +3 -2
- data/build.gradle +1 -7
- data/src/main/java/org/embulk/filter/TimestampFormatFilterPlugin.java +53 -42
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +12 -9
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +15 -9
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a7dcaa4b130acfa1790e07f7c132d63eb677cc5
|
4
|
+
data.tar.gz: 412c0aa7676be1171a43e8a903cf428389d54bb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4af5ea7bba8fc5f6f67a342aa2170d77f5cce3d6c0ad92e0ee90bd48e587e0172b227736f5c4361251589475b398e3943d334c12dd442f3c762b3f235a712925
|
7
|
+
data.tar.gz: 3b5fdde8febd1afb7dc5d9242ef07f6f15d2a4f003f5a1a55716fa6a4e589e9a6dfaee847cf81176d18a7ded3ff9becbfd0e5bdbcbaa3f50976854a0dde3e3a9
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -8,11 +8,11 @@ A filter plugin for Embulk to change timesatmp format
|
|
8
8
|
|
9
9
|
- **columns**: columns to retain (array of hash)
|
10
10
|
- **name**: name of column, must be a string column (required)
|
11
|
-
- **from_format**: specify the format of the input timestamp (
|
11
|
+
- **from_format**: specify the format of the input timestamp (array of strings, default is default_from_format)
|
12
12
|
- **from_timezone**: specify the timezone of the input timestamp (string, default is default_from_timezone)
|
13
13
|
- **to_format**: specify the format of the output timestamp (string, default is default_to_format)
|
14
14
|
- **to_timezone**: specify the timezone of the output timestamp (string, default is default_to_timezone)
|
15
|
-
- **default_from_format**: default timestamp format for the input timestamp columns (
|
15
|
+
- **default_from_format**: default timestamp format for the input timestamp columns (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
|
16
16
|
- **default_from_timezone**: default timezone for the input timestamp columns (string, default is `UTC`)
|
17
17
|
- **default_to_format**: default timestamp format for the output timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
18
18
|
- **default_to_timezone**: default timezone for the output timestamp olumns (string, default is `UTC`)
|
@@ -54,6 +54,7 @@ Output will be as:
|
|
54
54
|
|
55
55
|
## ToDo
|
56
56
|
|
57
|
+
* Currently, input must be a String column and output will be a String column. But, support Timestamp column (input / output)
|
57
58
|
* Write test
|
58
59
|
|
59
60
|
## Development
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.1"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
@@ -70,12 +70,6 @@ Gem::Specification.new do |spec|
|
|
70
70
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
71
71
|
spec.require_paths = ["lib"]
|
72
72
|
|
73
|
-
if spec.respond_to?(:metadata)
|
74
|
-
spec.metadata['allowed_push_host'] = "https://rubygems.dena.jp"
|
75
|
-
else
|
76
|
-
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
77
|
-
end
|
78
|
-
|
79
73
|
spec.add_development_dependency 'bundler', ['~> 1.0']
|
80
74
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
81
75
|
spec.add_development_dependency 'embulk-parser-jsonl'
|
@@ -1,18 +1,17 @@
|
|
1
1
|
package org.embulk.filter;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
|
6
5
|
import org.embulk.config.Config;
|
7
|
-
import org.embulk.config.ConfigInject;
|
8
6
|
import org.embulk.config.ConfigDefault;
|
9
7
|
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigInject;
|
10
9
|
import org.embulk.config.ConfigSource;
|
11
10
|
import org.embulk.config.Task;
|
12
11
|
import org.embulk.config.TaskSource;
|
13
12
|
|
14
|
-
import org.embulk.filter.timestamp_format.TimestampParser;
|
15
13
|
import org.embulk.filter.timestamp_format.TimestampFormatter;
|
14
|
+
import org.embulk.filter.timestamp_format.TimestampParser;
|
16
15
|
|
17
16
|
import org.embulk.spi.Column;
|
18
17
|
import org.embulk.spi.ColumnVisitor;
|
@@ -23,21 +22,20 @@ import org.embulk.spi.PageBuilder;
|
|
23
22
|
import org.embulk.spi.PageOutput;
|
24
23
|
import org.embulk.spi.PageReader;
|
25
24
|
import org.embulk.spi.Schema;
|
26
|
-
import org.embulk.spi.json.JsonParser;
|
27
25
|
import org.embulk.spi.time.Timestamp;
|
28
26
|
import org.embulk.spi.time.TimestampParseException;
|
29
27
|
|
30
|
-
import org.jruby.embed.ScriptingContainer;
|
31
28
|
import org.joda.time.DateTimeZone;
|
29
|
+
import org.jruby.embed.ScriptingContainer;
|
32
30
|
import org.msgpack.value.ArrayValue;
|
33
31
|
import org.msgpack.value.MapValue;
|
34
32
|
import org.msgpack.value.Value;
|
35
33
|
import org.msgpack.value.ValueFactory;
|
36
34
|
import org.slf4j.Logger;
|
37
35
|
|
36
|
+
import java.util.ArrayList;
|
38
37
|
import java.util.HashMap;
|
39
38
|
import java.util.List;
|
40
|
-
import java.util.ArrayList;
|
41
39
|
import java.util.Map;
|
42
40
|
import java.util.Objects;
|
43
41
|
|
@@ -84,6 +82,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
84
82
|
throw new ConfigException("\"columns\" must be specified.");
|
85
83
|
}
|
86
84
|
|
85
|
+
for (ColumnConfig columnConfig : columns) {
|
86
|
+
String name = columnConfig.getName();
|
87
|
+
if (!name.startsWith("$.")) {
|
88
|
+
inputSchema.lookupColumn(name); // throw Column 'name' is not found
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
87
92
|
control.run(task.dump(), inputSchema);
|
88
93
|
}
|
89
94
|
|
@@ -122,40 +127,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
122
127
|
}
|
123
128
|
|
124
129
|
return new PageOutput() {
|
125
|
-
private PageReader pageReader = new PageReader(inputSchema);
|
126
|
-
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
127
|
-
private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
128
|
-
|
129
|
-
@Override
|
130
|
-
public void finish()
|
131
|
-
{
|
132
|
-
pageBuilder.finish();
|
133
|
-
}
|
134
|
-
|
135
|
-
@Override
|
136
|
-
public void close()
|
137
|
-
{
|
138
|
-
pageBuilder.close();
|
139
|
-
}
|
140
|
-
|
141
|
-
@Override
|
142
|
-
public void add(Page page)
|
143
|
-
{
|
144
|
-
pageReader.setPage(page);
|
145
|
-
|
146
|
-
while (pageReader.nextRecord()) {
|
147
|
-
outputSchema.visitColumns(visitor);
|
148
|
-
pageBuilder.addRecord();
|
149
|
-
}
|
150
|
-
}
|
151
|
-
|
152
130
|
public Value formatTimestampStringRecursively(PluginTask task, String name, Value value)
|
153
131
|
throws TimestampParseException
|
154
132
|
{
|
155
133
|
if (value.isArrayValue()) {
|
156
134
|
ArrayValue arrayValue = value.asArrayValue();
|
157
135
|
int size = arrayValue.size();
|
158
|
-
Value
|
136
|
+
Value[] newValue = new Value[size];
|
159
137
|
for (int i = 0; i < size; i++) {
|
160
138
|
String k = new StringBuilder(name).append("[").append(Integer.toString(i)).append("]").toString();
|
161
139
|
Value v = arrayValue.get(i);
|
@@ -166,7 +144,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
166
144
|
else if (value.isMapValue()) {
|
167
145
|
MapValue mapValue = value.asMapValue();
|
168
146
|
int size = mapValue.size() * 2;
|
169
|
-
Value
|
147
|
+
Value[] newValue = new Value[size];
|
170
148
|
int i = 0;
|
171
149
|
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
172
150
|
Value k = entry.getKey();
|
@@ -179,7 +157,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
179
157
|
return ValueFactory.newMap(newValue, true);
|
180
158
|
}
|
181
159
|
else if (value.isStringValue()) {
|
182
|
-
String stringValue = value.asStringValue().asString()
|
160
|
+
String stringValue = value.asStringValue().asString();
|
183
161
|
String newValue = formatTimestampString(task, name, stringValue);
|
184
162
|
return (Objects.equals(newValue, stringValue)) ? value : ValueFactory.newString(newValue);
|
185
163
|
}
|
@@ -203,18 +181,47 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
203
181
|
catch (TimestampParseException ex) {
|
204
182
|
if (task.getStopOnInvalidRecord()) {
|
205
183
|
throw Throwables.propagate(ex);
|
206
|
-
}
|
184
|
+
}
|
185
|
+
else {
|
207
186
|
logger.warn("invalid value \"{}\":\"{}\"", name, value);
|
208
187
|
return value;
|
209
188
|
}
|
210
189
|
}
|
211
190
|
}
|
212
191
|
|
192
|
+
private PageReader pageReader = new PageReader(inputSchema);
|
193
|
+
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
194
|
+
private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
195
|
+
|
196
|
+
@Override
|
197
|
+
public void finish()
|
198
|
+
{
|
199
|
+
pageBuilder.finish();
|
200
|
+
}
|
201
|
+
|
202
|
+
@Override
|
203
|
+
public void close()
|
204
|
+
{
|
205
|
+
pageBuilder.close();
|
206
|
+
}
|
207
|
+
|
208
|
+
@Override
|
209
|
+
public void add(Page page)
|
210
|
+
{
|
211
|
+
pageReader.setPage(page);
|
212
|
+
|
213
|
+
while (pageReader.nextRecord()) {
|
214
|
+
outputSchema.visitColumns(visitor);
|
215
|
+
pageBuilder.addRecord();
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
213
219
|
class ColumnVisitorImpl implements ColumnVisitor
|
214
220
|
{
|
215
221
|
private final PageBuilder pageBuilder;
|
216
222
|
|
217
|
-
ColumnVisitorImpl(PageBuilder pageBuilder)
|
223
|
+
ColumnVisitorImpl(PageBuilder pageBuilder)
|
224
|
+
{
|
218
225
|
this.pageBuilder = pageBuilder;
|
219
226
|
}
|
220
227
|
|
@@ -223,7 +230,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
223
230
|
{
|
224
231
|
if (pageReader.isNull(column)) {
|
225
232
|
pageBuilder.setNull(column);
|
226
|
-
}
|
233
|
+
}
|
234
|
+
else {
|
227
235
|
pageBuilder.setBoolean(column, pageReader.getBoolean(column));
|
228
236
|
}
|
229
237
|
}
|
@@ -233,7 +241,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
233
241
|
{
|
234
242
|
if (pageReader.isNull(column)) {
|
235
243
|
pageBuilder.setNull(column);
|
236
|
-
}
|
244
|
+
}
|
245
|
+
else {
|
237
246
|
pageBuilder.setLong(column, pageReader.getLong(column));
|
238
247
|
}
|
239
248
|
}
|
@@ -243,7 +252,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
243
252
|
{
|
244
253
|
if (pageReader.isNull(column)) {
|
245
254
|
pageBuilder.setNull(column);
|
246
|
-
}
|
255
|
+
}
|
256
|
+
else {
|
247
257
|
pageBuilder.setDouble(column, pageReader.getDouble(column));
|
248
258
|
}
|
249
259
|
}
|
@@ -279,7 +289,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
279
289
|
{
|
280
290
|
if (pageReader.isNull(column)) {
|
281
291
|
pageBuilder.setNull(column);
|
282
|
-
}
|
292
|
+
}
|
293
|
+
else {
|
283
294
|
pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
|
284
295
|
}
|
285
296
|
}
|
@@ -1,19 +1,22 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
-
import java.util.Locale;
|
4
|
-
import org.jruby.embed.ScriptingContainer;
|
5
|
-
import org.joda.time.DateTime;
|
6
|
-
import org.joda.time.DateTimeZone;
|
7
3
|
import com.google.common.base.Optional;
|
8
|
-
|
9
|
-
import org.jruby.util.RubyDateFormat;
|
4
|
+
|
10
5
|
import org.embulk.config.Config;
|
11
6
|
import org.embulk.config.ConfigDefault;
|
12
|
-
import org.embulk.spi.util.LineEncoder;
|
13
|
-
import org.embulk.spi.time.Timestamp;
|
14
7
|
|
15
8
|
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
16
9
|
|
10
|
+
import org.embulk.spi.time.Timestamp;
|
11
|
+
import org.embulk.spi.util.LineEncoder;
|
12
|
+
|
13
|
+
import org.joda.time.DateTime;
|
14
|
+
import org.joda.time.DateTimeZone;
|
15
|
+
import org.jruby.embed.ScriptingContainer;
|
16
|
+
import org.jruby.util.RubyDateFormat;
|
17
|
+
|
18
|
+
import java.util.Locale;
|
19
|
+
|
17
20
|
public class TimestampFormatter
|
18
21
|
{
|
19
22
|
public interface Task
|
@@ -72,7 +75,7 @@ public class TimestampFormatter
|
|
72
75
|
public String format(Timestamp value)
|
73
76
|
{
|
74
77
|
// TODO optimize by using reused StringBuilder
|
75
|
-
toDateFormat.setDateTime(new DateTime(value.getEpochSecond()*1000, toTimeZone));
|
78
|
+
toDateFormat.setDateTime(new DateTime(value.getEpochSecond() * 1000, toTimeZone));
|
76
79
|
toDateFormat.setNSec(value.getNano());
|
77
80
|
return toDateFormat.format(null);
|
78
81
|
}
|
@@ -1,19 +1,24 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
-
import org.jruby.embed.ScriptingContainer;
|
4
|
-
import org.joda.time.DateTimeZone;
|
5
3
|
import com.google.common.base.Optional;
|
4
|
+
|
6
5
|
import org.embulk.config.Config;
|
7
6
|
import org.embulk.config.ConfigDefault;
|
8
|
-
|
9
|
-
import org.embulk.
|
10
|
-
|
7
|
+
|
8
|
+
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
9
|
+
|
11
10
|
import org.embulk.spi.time.JRubyTimeParserHelper;
|
12
11
|
import org.embulk.spi.time.JRubyTimeParserHelperFactory;
|
12
|
+
import org.embulk.spi.time.Timestamp;
|
13
|
+
import org.embulk.spi.time.TimestampParseException;
|
14
|
+
|
15
|
+
import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
|
16
|
+
|
17
|
+
import org.joda.time.DateTimeZone;
|
18
|
+
import org.jruby.embed.ScriptingContainer;
|
13
19
|
|
14
|
-
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
15
|
-
import java.util.List;
|
16
20
|
import java.util.ArrayList;
|
21
|
+
import java.util.List;
|
17
22
|
|
18
23
|
public class TimestampParser
|
19
24
|
{
|
@@ -81,7 +86,8 @@ public class TimestampParser
|
|
81
86
|
helper = h;
|
82
87
|
try {
|
83
88
|
localUsec = helper.strptimeUsec(text);
|
84
|
-
}
|
89
|
+
}
|
90
|
+
catch (TimestampParseException ex) {
|
85
91
|
exception = ex;
|
86
92
|
}
|
87
93
|
}
|
@@ -101,7 +107,7 @@ public class TimestampParser
|
|
101
107
|
|
102
108
|
long localSec = localUsec / 1000000;
|
103
109
|
long usec = localUsec % 1000000;
|
104
|
-
long sec = timeZone.convertLocalToUTC(localSec*1000, false) / 1000;
|
110
|
+
long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
|
105
111
|
|
106
112
|
return Timestamp.ofEpochSecond(sec, usec * 1000);
|
107
113
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -77,12 +77,11 @@ files:
|
|
77
77
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
|
78
78
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
|
79
79
|
- src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java
|
80
|
-
- classpath/embulk-filter-timestamp_format-0.1.
|
80
|
+
- classpath/embulk-filter-timestamp_format-0.1.1.jar
|
81
81
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
82
82
|
licenses:
|
83
83
|
- MIT
|
84
|
-
metadata:
|
85
|
-
allowed_push_host: https://rubygems.dena.jp
|
84
|
+
metadata: {}
|
86
85
|
post_install_message:
|
87
86
|
rdoc_options: []
|
88
87
|
require_paths:
|