embulk-filter-timestamp_format 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +3 -2
- data/build.gradle +1 -7
- data/src/main/java/org/embulk/filter/TimestampFormatFilterPlugin.java +53 -42
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java +12 -9
- data/src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java +15 -9
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a7dcaa4b130acfa1790e07f7c132d63eb677cc5
|
4
|
+
data.tar.gz: 412c0aa7676be1171a43e8a903cf428389d54bb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4af5ea7bba8fc5f6f67a342aa2170d77f5cce3d6c0ad92e0ee90bd48e587e0172b227736f5c4361251589475b398e3943d334c12dd442f3c762b3f235a712925
|
7
|
+
data.tar.gz: 3b5fdde8febd1afb7dc5d9242ef07f6f15d2a4f003f5a1a55716fa6a4e589e9a6dfaee847cf81176d18a7ded3ff9becbfd0e5bdbcbaa3f50976854a0dde3e3a9
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -8,11 +8,11 @@ A filter plugin for Embulk to change timesatmp format
|
|
8
8
|
|
9
9
|
- **columns**: columns to retain (array of hash)
|
10
10
|
- **name**: name of column, must be a string column (required)
|
11
|
-
- **from_format**: specify the format of the input timestamp (
|
11
|
+
- **from_format**: specify the format of the input timestamp (array of strings, default is default_from_format)
|
12
12
|
- **from_timezone**: specify the timezone of the input timestamp (string, default is default_from_timezone)
|
13
13
|
- **to_format**: specify the format of the output timestamp (string, default is default_to_format)
|
14
14
|
- **to_timezone**: specify the timezone of the output timestamp (string, default is default_to_timezone)
|
15
|
-
- **default_from_format**: default timestamp format for the input timestamp columns (
|
15
|
+
- **default_from_format**: default timestamp format for the input timestamp columns (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
|
16
16
|
- **default_from_timezone**: default timezone for the input timestamp columns (string, default is `UTC`)
|
17
17
|
- **default_to_format**: default timestamp format for the output timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
|
18
18
|
- **default_to_timezone**: default timezone for the output timestamp olumns (string, default is `UTC`)
|
@@ -54,6 +54,7 @@ Output will be as:
|
|
54
54
|
|
55
55
|
## ToDo
|
56
56
|
|
57
|
+
* Currently, input must be a String column and output will be a String column. But, support Timestamp column (input / output)
|
57
58
|
* Write test
|
58
59
|
|
59
60
|
## Development
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.1"
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
@@ -70,12 +70,6 @@ Gem::Specification.new do |spec|
|
|
70
70
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
71
71
|
spec.require_paths = ["lib"]
|
72
72
|
|
73
|
-
if spec.respond_to?(:metadata)
|
74
|
-
spec.metadata['allowed_push_host'] = "https://rubygems.dena.jp"
|
75
|
-
else
|
76
|
-
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
77
|
-
end
|
78
|
-
|
79
73
|
spec.add_development_dependency 'bundler', ['~> 1.0']
|
80
74
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
81
75
|
spec.add_development_dependency 'embulk-parser-jsonl'
|
@@ -1,18 +1,17 @@
|
|
1
1
|
package org.embulk.filter;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
|
6
5
|
import org.embulk.config.Config;
|
7
|
-
import org.embulk.config.ConfigInject;
|
8
6
|
import org.embulk.config.ConfigDefault;
|
9
7
|
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigInject;
|
10
9
|
import org.embulk.config.ConfigSource;
|
11
10
|
import org.embulk.config.Task;
|
12
11
|
import org.embulk.config.TaskSource;
|
13
12
|
|
14
|
-
import org.embulk.filter.timestamp_format.TimestampParser;
|
15
13
|
import org.embulk.filter.timestamp_format.TimestampFormatter;
|
14
|
+
import org.embulk.filter.timestamp_format.TimestampParser;
|
16
15
|
|
17
16
|
import org.embulk.spi.Column;
|
18
17
|
import org.embulk.spi.ColumnVisitor;
|
@@ -23,21 +22,20 @@ import org.embulk.spi.PageBuilder;
|
|
23
22
|
import org.embulk.spi.PageOutput;
|
24
23
|
import org.embulk.spi.PageReader;
|
25
24
|
import org.embulk.spi.Schema;
|
26
|
-
import org.embulk.spi.json.JsonParser;
|
27
25
|
import org.embulk.spi.time.Timestamp;
|
28
26
|
import org.embulk.spi.time.TimestampParseException;
|
29
27
|
|
30
|
-
import org.jruby.embed.ScriptingContainer;
|
31
28
|
import org.joda.time.DateTimeZone;
|
29
|
+
import org.jruby.embed.ScriptingContainer;
|
32
30
|
import org.msgpack.value.ArrayValue;
|
33
31
|
import org.msgpack.value.MapValue;
|
34
32
|
import org.msgpack.value.Value;
|
35
33
|
import org.msgpack.value.ValueFactory;
|
36
34
|
import org.slf4j.Logger;
|
37
35
|
|
36
|
+
import java.util.ArrayList;
|
38
37
|
import java.util.HashMap;
|
39
38
|
import java.util.List;
|
40
|
-
import java.util.ArrayList;
|
41
39
|
import java.util.Map;
|
42
40
|
import java.util.Objects;
|
43
41
|
|
@@ -84,6 +82,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
84
82
|
throw new ConfigException("\"columns\" must be specified.");
|
85
83
|
}
|
86
84
|
|
85
|
+
for (ColumnConfig columnConfig : columns) {
|
86
|
+
String name = columnConfig.getName();
|
87
|
+
if (!name.startsWith("$.")) {
|
88
|
+
inputSchema.lookupColumn(name); // throw Column 'name' is not found
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
87
92
|
control.run(task.dump(), inputSchema);
|
88
93
|
}
|
89
94
|
|
@@ -122,40 +127,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
122
127
|
}
|
123
128
|
|
124
129
|
return new PageOutput() {
|
125
|
-
private PageReader pageReader = new PageReader(inputSchema);
|
126
|
-
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
127
|
-
private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
128
|
-
|
129
|
-
@Override
|
130
|
-
public void finish()
|
131
|
-
{
|
132
|
-
pageBuilder.finish();
|
133
|
-
}
|
134
|
-
|
135
|
-
@Override
|
136
|
-
public void close()
|
137
|
-
{
|
138
|
-
pageBuilder.close();
|
139
|
-
}
|
140
|
-
|
141
|
-
@Override
|
142
|
-
public void add(Page page)
|
143
|
-
{
|
144
|
-
pageReader.setPage(page);
|
145
|
-
|
146
|
-
while (pageReader.nextRecord()) {
|
147
|
-
outputSchema.visitColumns(visitor);
|
148
|
-
pageBuilder.addRecord();
|
149
|
-
}
|
150
|
-
}
|
151
|
-
|
152
130
|
public Value formatTimestampStringRecursively(PluginTask task, String name, Value value)
|
153
131
|
throws TimestampParseException
|
154
132
|
{
|
155
133
|
if (value.isArrayValue()) {
|
156
134
|
ArrayValue arrayValue = value.asArrayValue();
|
157
135
|
int size = arrayValue.size();
|
158
|
-
Value
|
136
|
+
Value[] newValue = new Value[size];
|
159
137
|
for (int i = 0; i < size; i++) {
|
160
138
|
String k = new StringBuilder(name).append("[").append(Integer.toString(i)).append("]").toString();
|
161
139
|
Value v = arrayValue.get(i);
|
@@ -166,7 +144,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
166
144
|
else if (value.isMapValue()) {
|
167
145
|
MapValue mapValue = value.asMapValue();
|
168
146
|
int size = mapValue.size() * 2;
|
169
|
-
Value
|
147
|
+
Value[] newValue = new Value[size];
|
170
148
|
int i = 0;
|
171
149
|
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
172
150
|
Value k = entry.getKey();
|
@@ -179,7 +157,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
179
157
|
return ValueFactory.newMap(newValue, true);
|
180
158
|
}
|
181
159
|
else if (value.isStringValue()) {
|
182
|
-
String stringValue = value.asStringValue().asString()
|
160
|
+
String stringValue = value.asStringValue().asString();
|
183
161
|
String newValue = formatTimestampString(task, name, stringValue);
|
184
162
|
return (Objects.equals(newValue, stringValue)) ? value : ValueFactory.newString(newValue);
|
185
163
|
}
|
@@ -203,18 +181,47 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
203
181
|
catch (TimestampParseException ex) {
|
204
182
|
if (task.getStopOnInvalidRecord()) {
|
205
183
|
throw Throwables.propagate(ex);
|
206
|
-
}
|
184
|
+
}
|
185
|
+
else {
|
207
186
|
logger.warn("invalid value \"{}\":\"{}\"", name, value);
|
208
187
|
return value;
|
209
188
|
}
|
210
189
|
}
|
211
190
|
}
|
212
191
|
|
192
|
+
private PageReader pageReader = new PageReader(inputSchema);
|
193
|
+
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
194
|
+
private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
|
195
|
+
|
196
|
+
@Override
|
197
|
+
public void finish()
|
198
|
+
{
|
199
|
+
pageBuilder.finish();
|
200
|
+
}
|
201
|
+
|
202
|
+
@Override
|
203
|
+
public void close()
|
204
|
+
{
|
205
|
+
pageBuilder.close();
|
206
|
+
}
|
207
|
+
|
208
|
+
@Override
|
209
|
+
public void add(Page page)
|
210
|
+
{
|
211
|
+
pageReader.setPage(page);
|
212
|
+
|
213
|
+
while (pageReader.nextRecord()) {
|
214
|
+
outputSchema.visitColumns(visitor);
|
215
|
+
pageBuilder.addRecord();
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
213
219
|
class ColumnVisitorImpl implements ColumnVisitor
|
214
220
|
{
|
215
221
|
private final PageBuilder pageBuilder;
|
216
222
|
|
217
|
-
ColumnVisitorImpl(PageBuilder pageBuilder)
|
223
|
+
ColumnVisitorImpl(PageBuilder pageBuilder)
|
224
|
+
{
|
218
225
|
this.pageBuilder = pageBuilder;
|
219
226
|
}
|
220
227
|
|
@@ -223,7 +230,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
223
230
|
{
|
224
231
|
if (pageReader.isNull(column)) {
|
225
232
|
pageBuilder.setNull(column);
|
226
|
-
}
|
233
|
+
}
|
234
|
+
else {
|
227
235
|
pageBuilder.setBoolean(column, pageReader.getBoolean(column));
|
228
236
|
}
|
229
237
|
}
|
@@ -233,7 +241,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
233
241
|
{
|
234
242
|
if (pageReader.isNull(column)) {
|
235
243
|
pageBuilder.setNull(column);
|
236
|
-
}
|
244
|
+
}
|
245
|
+
else {
|
237
246
|
pageBuilder.setLong(column, pageReader.getLong(column));
|
238
247
|
}
|
239
248
|
}
|
@@ -243,7 +252,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
243
252
|
{
|
244
253
|
if (pageReader.isNull(column)) {
|
245
254
|
pageBuilder.setNull(column);
|
246
|
-
}
|
255
|
+
}
|
256
|
+
else {
|
247
257
|
pageBuilder.setDouble(column, pageReader.getDouble(column));
|
248
258
|
}
|
249
259
|
}
|
@@ -279,7 +289,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
|
|
279
289
|
{
|
280
290
|
if (pageReader.isNull(column)) {
|
281
291
|
pageBuilder.setNull(column);
|
282
|
-
}
|
292
|
+
}
|
293
|
+
else {
|
283
294
|
pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
|
284
295
|
}
|
285
296
|
}
|
@@ -1,19 +1,22 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
-
import java.util.Locale;
|
4
|
-
import org.jruby.embed.ScriptingContainer;
|
5
|
-
import org.joda.time.DateTime;
|
6
|
-
import org.joda.time.DateTimeZone;
|
7
3
|
import com.google.common.base.Optional;
|
8
|
-
|
9
|
-
import org.jruby.util.RubyDateFormat;
|
4
|
+
|
10
5
|
import org.embulk.config.Config;
|
11
6
|
import org.embulk.config.ConfigDefault;
|
12
|
-
import org.embulk.spi.util.LineEncoder;
|
13
|
-
import org.embulk.spi.time.Timestamp;
|
14
7
|
|
15
8
|
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
16
9
|
|
10
|
+
import org.embulk.spi.time.Timestamp;
|
11
|
+
import org.embulk.spi.util.LineEncoder;
|
12
|
+
|
13
|
+
import org.joda.time.DateTime;
|
14
|
+
import org.joda.time.DateTimeZone;
|
15
|
+
import org.jruby.embed.ScriptingContainer;
|
16
|
+
import org.jruby.util.RubyDateFormat;
|
17
|
+
|
18
|
+
import java.util.Locale;
|
19
|
+
|
17
20
|
public class TimestampFormatter
|
18
21
|
{
|
19
22
|
public interface Task
|
@@ -72,7 +75,7 @@ public class TimestampFormatter
|
|
72
75
|
public String format(Timestamp value)
|
73
76
|
{
|
74
77
|
// TODO optimize by using reused StringBuilder
|
75
|
-
toDateFormat.setDateTime(new DateTime(value.getEpochSecond()*1000, toTimeZone));
|
78
|
+
toDateFormat.setDateTime(new DateTime(value.getEpochSecond() * 1000, toTimeZone));
|
76
79
|
toDateFormat.setNSec(value.getNano());
|
77
80
|
return toDateFormat.format(null);
|
78
81
|
}
|
@@ -1,19 +1,24 @@
|
|
1
1
|
package org.embulk.filter.timestamp_format;
|
2
2
|
|
3
|
-
import org.jruby.embed.ScriptingContainer;
|
4
|
-
import org.joda.time.DateTimeZone;
|
5
3
|
import com.google.common.base.Optional;
|
4
|
+
|
6
5
|
import org.embulk.config.Config;
|
7
6
|
import org.embulk.config.ConfigDefault;
|
8
|
-
|
9
|
-
import org.embulk.
|
10
|
-
|
7
|
+
|
8
|
+
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
9
|
+
|
11
10
|
import org.embulk.spi.time.JRubyTimeParserHelper;
|
12
11
|
import org.embulk.spi.time.JRubyTimeParserHelperFactory;
|
12
|
+
import org.embulk.spi.time.Timestamp;
|
13
|
+
import org.embulk.spi.time.TimestampParseException;
|
14
|
+
|
15
|
+
import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
|
16
|
+
|
17
|
+
import org.joda.time.DateTimeZone;
|
18
|
+
import org.jruby.embed.ScriptingContainer;
|
13
19
|
|
14
|
-
import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
|
15
|
-
import java.util.List;
|
16
20
|
import java.util.ArrayList;
|
21
|
+
import java.util.List;
|
17
22
|
|
18
23
|
public class TimestampParser
|
19
24
|
{
|
@@ -81,7 +86,8 @@ public class TimestampParser
|
|
81
86
|
helper = h;
|
82
87
|
try {
|
83
88
|
localUsec = helper.strptimeUsec(text);
|
84
|
-
}
|
89
|
+
}
|
90
|
+
catch (TimestampParseException ex) {
|
85
91
|
exception = ex;
|
86
92
|
}
|
87
93
|
}
|
@@ -101,7 +107,7 @@ public class TimestampParser
|
|
101
107
|
|
102
108
|
long localSec = localUsec / 1000000;
|
103
109
|
long usec = localUsec % 1000000;
|
104
|
-
long sec = timeZone.convertLocalToUTC(localSec*1000, false) / 1000;
|
110
|
+
long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
|
105
111
|
|
106
112
|
return Timestamp.ofEpochSecond(sec, usec * 1000);
|
107
113
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-timestamp_format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
@@ -77,12 +77,11 @@ files:
|
|
77
77
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
|
78
78
|
- src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
|
79
79
|
- src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java
|
80
|
-
- classpath/embulk-filter-timestamp_format-0.1.
|
80
|
+
- classpath/embulk-filter-timestamp_format-0.1.1.jar
|
81
81
|
homepage: https://github.com/sonots/embulk-filter-timestamp_format
|
82
82
|
licenses:
|
83
83
|
- MIT
|
84
|
-
metadata:
|
85
|
-
allowed_push_host: https://rubygems.dena.jp
|
84
|
+
metadata: {}
|
86
85
|
post_install_message:
|
87
86
|
rdoc_options: []
|
88
87
|
require_paths:
|