embulk-filter-timestamp_format 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5bbc918d5bf6ffd22de31d35b00909dcb7e29b8a
4
- data.tar.gz: 23ac08f9781e5c0accdd06c58034801f738ab4b0
3
+ metadata.gz: 3a7dcaa4b130acfa1790e07f7c132d63eb677cc5
4
+ data.tar.gz: 412c0aa7676be1171a43e8a903cf428389d54bb6
5
5
  SHA512:
6
- metadata.gz: c1db3ebc3893536c371f7765151d9ea673a21f05bd655a311a0a935b2f0da61cbb66bb18bd33dd59a7e63a12754be0f74991a431696ef091e48f68f2a2ff1b9d
7
- data.tar.gz: e26a3e99225d16ed059f0201ecb7a1ecf009fb1544b0137ea32d2d7d49a6f89815bbc08e608b73f503fcbd21e353ea12f9a62aa5d5fee161af8ad37806b172f4
6
+ metadata.gz: 4af5ea7bba8fc5f6f67a342aa2170d77f5cce3d6c0ad92e0ee90bd48e587e0172b227736f5c4361251589475b398e3943d334c12dd442f3c762b3f235a712925
7
+ data.tar.gz: 3b5fdde8febd1afb7dc5d9242ef07f6f15d2a4f003f5a1a55716fa6a4e589e9a6dfaee847cf81176d18a7ded3ff9becbfd0e5bdbcbaa3f50976854a0dde3e3a9
data/CHANGELOG.md CHANGED
@@ -0,0 +1,9 @@
1
+ # 0.1.1 (2016-04-26)
2
+
3
+ Enhancements:
4
+
5
+ * Check whether specified columns exist
6
+
7
+ # 0.1.0 (2016-04-26)
8
+
9
+ initial version
data/README.md CHANGED
@@ -8,11 +8,11 @@ A filter plugin for Embulk to change timesatmp format
8
8
 
9
9
  - **columns**: columns to retain (array of hash)
10
10
  - **name**: name of column, must be a string column (required)
11
- - **from_format**: specify the format of the input timestamp (string or an array, default is default_from_format)
11
+ - **from_format**: specify the format of the input timestamp (array of strings, default is default_from_format)
12
12
  - **from_timezone**: specify the timezone of the input timestamp (string, default is default_from_timezone)
13
13
  - **to_format**: specify the format of the output timestamp (string, default is default_to_format)
14
14
  - **to_timezone**: specify the timezone of the output timestamp (string, default is default_to_timezone)
15
- - **default_from_format**: default timestamp format for the input timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
15
+ - **default_from_format**: default timestamp format for the input timestamp columns (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
16
16
  - **default_from_timezone**: default timezone for the input timestamp columns (string, default is `UTC`)
17
17
  - **default_to_format**: default timestamp format for the output timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
18
18
  - **default_to_timezone**: default timezone for the output timestamp olumns (string, default is `UTC`)
@@ -54,6 +54,7 @@ Output will be as:
54
54
 
55
55
  ## ToDo
56
56
 
57
+ * Currently, input must be a String column and output will be a String column. But, support Timestamp column (input / output)
57
58
  * Write test
58
59
 
59
60
  ## Development
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.1.1"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -70,12 +70,6 @@ Gem::Specification.new do |spec|
70
70
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
71
71
  spec.require_paths = ["lib"]
72
72
 
73
- if spec.respond_to?(:metadata)
74
- spec.metadata['allowed_push_host'] = "https://rubygems.dena.jp"
75
- else
76
- raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
77
- end
78
-
79
73
  spec.add_development_dependency 'bundler', ['~> 1.0']
80
74
  spec.add_development_dependency 'rake', ['>= 10.0']
81
75
  spec.add_development_dependency 'embulk-parser-jsonl'
@@ -1,18 +1,17 @@
1
1
  package org.embulk.filter;
2
2
 
3
- import com.google.common.base.Optional;
4
3
  import com.google.common.base.Throwables;
5
4
 
6
5
  import org.embulk.config.Config;
7
- import org.embulk.config.ConfigInject;
8
6
  import org.embulk.config.ConfigDefault;
9
7
  import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigInject;
10
9
  import org.embulk.config.ConfigSource;
11
10
  import org.embulk.config.Task;
12
11
  import org.embulk.config.TaskSource;
13
12
 
14
- import org.embulk.filter.timestamp_format.TimestampParser;
15
13
  import org.embulk.filter.timestamp_format.TimestampFormatter;
14
+ import org.embulk.filter.timestamp_format.TimestampParser;
16
15
 
17
16
  import org.embulk.spi.Column;
18
17
  import org.embulk.spi.ColumnVisitor;
@@ -23,21 +22,20 @@ import org.embulk.spi.PageBuilder;
23
22
  import org.embulk.spi.PageOutput;
24
23
  import org.embulk.spi.PageReader;
25
24
  import org.embulk.spi.Schema;
26
- import org.embulk.spi.json.JsonParser;
27
25
  import org.embulk.spi.time.Timestamp;
28
26
  import org.embulk.spi.time.TimestampParseException;
29
27
 
30
- import org.jruby.embed.ScriptingContainer;
31
28
  import org.joda.time.DateTimeZone;
29
+ import org.jruby.embed.ScriptingContainer;
32
30
  import org.msgpack.value.ArrayValue;
33
31
  import org.msgpack.value.MapValue;
34
32
  import org.msgpack.value.Value;
35
33
  import org.msgpack.value.ValueFactory;
36
34
  import org.slf4j.Logger;
37
35
 
36
+ import java.util.ArrayList;
38
37
  import java.util.HashMap;
39
38
  import java.util.List;
40
- import java.util.ArrayList;
41
39
  import java.util.Map;
42
40
  import java.util.Objects;
43
41
 
@@ -84,6 +82,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
84
82
  throw new ConfigException("\"columns\" must be specified.");
85
83
  }
86
84
 
85
+ for (ColumnConfig columnConfig : columns) {
86
+ String name = columnConfig.getName();
87
+ if (!name.startsWith("$.")) {
88
+ inputSchema.lookupColumn(name); // throw Column 'name' is not found
89
+ }
90
+ }
91
+
87
92
  control.run(task.dump(), inputSchema);
88
93
  }
89
94
 
@@ -122,40 +127,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
122
127
  }
123
128
 
124
129
  return new PageOutput() {
125
- private PageReader pageReader = new PageReader(inputSchema);
126
- private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
127
- private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
128
-
129
- @Override
130
- public void finish()
131
- {
132
- pageBuilder.finish();
133
- }
134
-
135
- @Override
136
- public void close()
137
- {
138
- pageBuilder.close();
139
- }
140
-
141
- @Override
142
- public void add(Page page)
143
- {
144
- pageReader.setPage(page);
145
-
146
- while (pageReader.nextRecord()) {
147
- outputSchema.visitColumns(visitor);
148
- pageBuilder.addRecord();
149
- }
150
- }
151
-
152
130
  public Value formatTimestampStringRecursively(PluginTask task, String name, Value value)
153
131
  throws TimestampParseException
154
132
  {
155
133
  if (value.isArrayValue()) {
156
134
  ArrayValue arrayValue = value.asArrayValue();
157
135
  int size = arrayValue.size();
158
- Value newValue[] = new Value[size];
136
+ Value[] newValue = new Value[size];
159
137
  for (int i = 0; i < size; i++) {
160
138
  String k = new StringBuilder(name).append("[").append(Integer.toString(i)).append("]").toString();
161
139
  Value v = arrayValue.get(i);
@@ -166,7 +144,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
166
144
  else if (value.isMapValue()) {
167
145
  MapValue mapValue = value.asMapValue();
168
146
  int size = mapValue.size() * 2;
169
- Value newValue[] = new Value[size];
147
+ Value[] newValue = new Value[size];
170
148
  int i = 0;
171
149
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
172
150
  Value k = entry.getKey();
@@ -179,7 +157,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
179
157
  return ValueFactory.newMap(newValue, true);
180
158
  }
181
159
  else if (value.isStringValue()) {
182
- String stringValue = value.asStringValue().asString() ;
160
+ String stringValue = value.asStringValue().asString();
183
161
  String newValue = formatTimestampString(task, name, stringValue);
184
162
  return (Objects.equals(newValue, stringValue)) ? value : ValueFactory.newString(newValue);
185
163
  }
@@ -203,18 +181,47 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
203
181
  catch (TimestampParseException ex) {
204
182
  if (task.getStopOnInvalidRecord()) {
205
183
  throw Throwables.propagate(ex);
206
- } else {
184
+ }
185
+ else {
207
186
  logger.warn("invalid value \"{}\":\"{}\"", name, value);
208
187
  return value;
209
188
  }
210
189
  }
211
190
  }
212
191
 
192
+ private PageReader pageReader = new PageReader(inputSchema);
193
+ private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
194
+ private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
195
+
196
+ @Override
197
+ public void finish()
198
+ {
199
+ pageBuilder.finish();
200
+ }
201
+
202
+ @Override
203
+ public void close()
204
+ {
205
+ pageBuilder.close();
206
+ }
207
+
208
+ @Override
209
+ public void add(Page page)
210
+ {
211
+ pageReader.setPage(page);
212
+
213
+ while (pageReader.nextRecord()) {
214
+ outputSchema.visitColumns(visitor);
215
+ pageBuilder.addRecord();
216
+ }
217
+ }
218
+
213
219
  class ColumnVisitorImpl implements ColumnVisitor
214
220
  {
215
221
  private final PageBuilder pageBuilder;
216
222
 
217
- ColumnVisitorImpl(PageBuilder pageBuilder) {
223
+ ColumnVisitorImpl(PageBuilder pageBuilder)
224
+ {
218
225
  this.pageBuilder = pageBuilder;
219
226
  }
220
227
 
@@ -223,7 +230,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
223
230
  {
224
231
  if (pageReader.isNull(column)) {
225
232
  pageBuilder.setNull(column);
226
- } else {
233
+ }
234
+ else {
227
235
  pageBuilder.setBoolean(column, pageReader.getBoolean(column));
228
236
  }
229
237
  }
@@ -233,7 +241,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
233
241
  {
234
242
  if (pageReader.isNull(column)) {
235
243
  pageBuilder.setNull(column);
236
- } else {
244
+ }
245
+ else {
237
246
  pageBuilder.setLong(column, pageReader.getLong(column));
238
247
  }
239
248
  }
@@ -243,7 +252,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
243
252
  {
244
253
  if (pageReader.isNull(column)) {
245
254
  pageBuilder.setNull(column);
246
- } else {
255
+ }
256
+ else {
247
257
  pageBuilder.setDouble(column, pageReader.getDouble(column));
248
258
  }
249
259
  }
@@ -279,7 +289,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
279
289
  {
280
290
  if (pageReader.isNull(column)) {
281
291
  pageBuilder.setNull(column);
282
- } else {
292
+ }
293
+ else {
283
294
  pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
284
295
  }
285
296
  }
@@ -1,19 +1,22 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
- import java.util.Locale;
4
- import org.jruby.embed.ScriptingContainer;
5
- import org.joda.time.DateTime;
6
- import org.joda.time.DateTimeZone;
7
3
  import com.google.common.base.Optional;
8
- import org.jruby.embed.ScriptingContainer;
9
- import org.jruby.util.RubyDateFormat;
4
+
10
5
  import org.embulk.config.Config;
11
6
  import org.embulk.config.ConfigDefault;
12
- import org.embulk.spi.util.LineEncoder;
13
- import org.embulk.spi.time.Timestamp;
14
7
 
15
8
  import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
16
9
 
10
+ import org.embulk.spi.time.Timestamp;
11
+ import org.embulk.spi.util.LineEncoder;
12
+
13
+ import org.joda.time.DateTime;
14
+ import org.joda.time.DateTimeZone;
15
+ import org.jruby.embed.ScriptingContainer;
16
+ import org.jruby.util.RubyDateFormat;
17
+
18
+ import java.util.Locale;
19
+
17
20
  public class TimestampFormatter
18
21
  {
19
22
  public interface Task
@@ -72,7 +75,7 @@ public class TimestampFormatter
72
75
  public String format(Timestamp value)
73
76
  {
74
77
  // TODO optimize by using reused StringBuilder
75
- toDateFormat.setDateTime(new DateTime(value.getEpochSecond()*1000, toTimeZone));
78
+ toDateFormat.setDateTime(new DateTime(value.getEpochSecond() * 1000, toTimeZone));
76
79
  toDateFormat.setNSec(value.getNano());
77
80
  return toDateFormat.format(null);
78
81
  }
@@ -1,19 +1,24 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
- import org.jruby.embed.ScriptingContainer;
4
- import org.joda.time.DateTimeZone;
5
3
  import com.google.common.base.Optional;
4
+
6
5
  import org.embulk.config.Config;
7
6
  import org.embulk.config.ConfigDefault;
8
- import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
9
- import org.embulk.spi.time.Timestamp;
10
- import org.embulk.spi.time.TimestampParseException;
7
+
8
+ import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
9
+
11
10
  import org.embulk.spi.time.JRubyTimeParserHelper;
12
11
  import org.embulk.spi.time.JRubyTimeParserHelperFactory;
12
+ import org.embulk.spi.time.Timestamp;
13
+ import org.embulk.spi.time.TimestampParseException;
14
+
15
+ import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
16
+
17
+ import org.joda.time.DateTimeZone;
18
+ import org.jruby.embed.ScriptingContainer;
13
19
 
14
- import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
15
- import java.util.List;
16
20
  import java.util.ArrayList;
21
+ import java.util.List;
17
22
 
18
23
  public class TimestampParser
19
24
  {
@@ -81,7 +86,8 @@ public class TimestampParser
81
86
  helper = h;
82
87
  try {
83
88
  localUsec = helper.strptimeUsec(text);
84
- } catch (TimestampParseException ex) {
89
+ }
90
+ catch (TimestampParseException ex) {
85
91
  exception = ex;
86
92
  }
87
93
  }
@@ -101,7 +107,7 @@ public class TimestampParser
101
107
 
102
108
  long localSec = localUsec / 1000000;
103
109
  long usec = localUsec % 1000000;
104
- long sec = timeZone.convertLocalToUTC(localSec*1000, false) / 1000;
110
+ long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
105
111
 
106
112
  return Timestamp.ofEpochSecond(sec, usec * 1000);
107
113
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -77,12 +77,11 @@ files:
77
77
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
78
78
  - src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
79
79
  - src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java
80
- - classpath/embulk-filter-timestamp_format-0.1.0.jar
80
+ - classpath/embulk-filter-timestamp_format-0.1.1.jar
81
81
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
82
82
  licenses:
83
83
  - MIT
84
- metadata:
85
- allowed_push_host: https://rubygems.dena.jp
84
+ metadata: {}
86
85
  post_install_message:
87
86
  rdoc_options: []
88
87
  require_paths: