embulk-filter-timestamp_format 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5bbc918d5bf6ffd22de31d35b00909dcb7e29b8a
4
- data.tar.gz: 23ac08f9781e5c0accdd06c58034801f738ab4b0
3
+ metadata.gz: 3a7dcaa4b130acfa1790e07f7c132d63eb677cc5
4
+ data.tar.gz: 412c0aa7676be1171a43e8a903cf428389d54bb6
5
5
  SHA512:
6
- metadata.gz: c1db3ebc3893536c371f7765151d9ea673a21f05bd655a311a0a935b2f0da61cbb66bb18bd33dd59a7e63a12754be0f74991a431696ef091e48f68f2a2ff1b9d
7
- data.tar.gz: e26a3e99225d16ed059f0201ecb7a1ecf009fb1544b0137ea32d2d7d49a6f89815bbc08e608b73f503fcbd21e353ea12f9a62aa5d5fee161af8ad37806b172f4
6
+ metadata.gz: 4af5ea7bba8fc5f6f67a342aa2170d77f5cce3d6c0ad92e0ee90bd48e587e0172b227736f5c4361251589475b398e3943d334c12dd442f3c762b3f235a712925
7
+ data.tar.gz: 3b5fdde8febd1afb7dc5d9242ef07f6f15d2a4f003f5a1a55716fa6a4e589e9a6dfaee847cf81176d18a7ded3ff9becbfd0e5bdbcbaa3f50976854a0dde3e3a9
data/CHANGELOG.md CHANGED
@@ -0,0 +1,9 @@
1
+ # 0.1.1 (2016-04-26)
2
+
3
+ Enhancements:
4
+
5
+ * Check whether specified columns exist
6
+
7
+ # 0.1.0 (2016-04-26)
8
+
9
+ initial version
data/README.md CHANGED
@@ -8,11 +8,11 @@ A filter plugin for Embulk to change timesatmp format
8
8
 
9
9
  - **columns**: columns to retain (array of hash)
10
10
  - **name**: name of column, must be a string column (required)
11
- - **from_format**: specify the format of the input timestamp (string or an array, default is default_from_format)
11
+ - **from_format**: specify the format of the input timestamp (array of strings, default is default_from_format)
12
12
  - **from_timezone**: specify the timezone of the input timestamp (string, default is default_from_timezone)
13
13
  - **to_format**: specify the format of the output timestamp (string, default is default_to_format)
14
14
  - **to_timezone**: specify the timezone of the output timestamp (string, default is default_to_timezone)
15
- - **default_from_format**: default timestamp format for the input timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
15
+ - **default_from_format**: default timestamp format for the input timestamp columns (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`)
16
16
  - **default_from_timezone**: default timezone for the input timestamp columns (string, default is `UTC`)
17
17
  - **default_to_format**: default timestamp format for the output timestamp columns (string, default is `%Y-%m-%d %H:%M:%S.%N %z`)
18
18
  - **default_to_timezone**: default timezone for the output timestamp olumns (string, default is `UTC`)
@@ -54,6 +54,7 @@ Output will be as:
54
54
 
55
55
  ## ToDo
56
56
 
57
+ * Currently, input must be a String column and output will be a String column. But, support Timestamp column (input / output)
57
58
  * Write test
58
59
 
59
60
  ## Development
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.1.1"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -70,12 +70,6 @@ Gem::Specification.new do |spec|
70
70
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
71
71
  spec.require_paths = ["lib"]
72
72
 
73
- if spec.respond_to?(:metadata)
74
- spec.metadata['allowed_push_host'] = "https://rubygems.dena.jp"
75
- else
76
- raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
77
- end
78
-
79
73
  spec.add_development_dependency 'bundler', ['~> 1.0']
80
74
  spec.add_development_dependency 'rake', ['>= 10.0']
81
75
  spec.add_development_dependency 'embulk-parser-jsonl'
@@ -1,18 +1,17 @@
1
1
  package org.embulk.filter;
2
2
 
3
- import com.google.common.base.Optional;
4
3
  import com.google.common.base.Throwables;
5
4
 
6
5
  import org.embulk.config.Config;
7
- import org.embulk.config.ConfigInject;
8
6
  import org.embulk.config.ConfigDefault;
9
7
  import org.embulk.config.ConfigException;
8
+ import org.embulk.config.ConfigInject;
10
9
  import org.embulk.config.ConfigSource;
11
10
  import org.embulk.config.Task;
12
11
  import org.embulk.config.TaskSource;
13
12
 
14
- import org.embulk.filter.timestamp_format.TimestampParser;
15
13
  import org.embulk.filter.timestamp_format.TimestampFormatter;
14
+ import org.embulk.filter.timestamp_format.TimestampParser;
16
15
 
17
16
  import org.embulk.spi.Column;
18
17
  import org.embulk.spi.ColumnVisitor;
@@ -23,21 +22,20 @@ import org.embulk.spi.PageBuilder;
23
22
  import org.embulk.spi.PageOutput;
24
23
  import org.embulk.spi.PageReader;
25
24
  import org.embulk.spi.Schema;
26
- import org.embulk.spi.json.JsonParser;
27
25
  import org.embulk.spi.time.Timestamp;
28
26
  import org.embulk.spi.time.TimestampParseException;
29
27
 
30
- import org.jruby.embed.ScriptingContainer;
31
28
  import org.joda.time.DateTimeZone;
29
+ import org.jruby.embed.ScriptingContainer;
32
30
  import org.msgpack.value.ArrayValue;
33
31
  import org.msgpack.value.MapValue;
34
32
  import org.msgpack.value.Value;
35
33
  import org.msgpack.value.ValueFactory;
36
34
  import org.slf4j.Logger;
37
35
 
36
+ import java.util.ArrayList;
38
37
  import java.util.HashMap;
39
38
  import java.util.List;
40
- import java.util.ArrayList;
41
39
  import java.util.Map;
42
40
  import java.util.Objects;
43
41
 
@@ -84,6 +82,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
84
82
  throw new ConfigException("\"columns\" must be specified.");
85
83
  }
86
84
 
85
+ for (ColumnConfig columnConfig : columns) {
86
+ String name = columnConfig.getName();
87
+ if (!name.startsWith("$.")) {
88
+ inputSchema.lookupColumn(name); // throw Column 'name' is not found
89
+ }
90
+ }
91
+
87
92
  control.run(task.dump(), inputSchema);
88
93
  }
89
94
 
@@ -122,40 +127,13 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
122
127
  }
123
128
 
124
129
  return new PageOutput() {
125
- private PageReader pageReader = new PageReader(inputSchema);
126
- private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
127
- private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
128
-
129
- @Override
130
- public void finish()
131
- {
132
- pageBuilder.finish();
133
- }
134
-
135
- @Override
136
- public void close()
137
- {
138
- pageBuilder.close();
139
- }
140
-
141
- @Override
142
- public void add(Page page)
143
- {
144
- pageReader.setPage(page);
145
-
146
- while (pageReader.nextRecord()) {
147
- outputSchema.visitColumns(visitor);
148
- pageBuilder.addRecord();
149
- }
150
- }
151
-
152
130
  public Value formatTimestampStringRecursively(PluginTask task, String name, Value value)
153
131
  throws TimestampParseException
154
132
  {
155
133
  if (value.isArrayValue()) {
156
134
  ArrayValue arrayValue = value.asArrayValue();
157
135
  int size = arrayValue.size();
158
- Value newValue[] = new Value[size];
136
+ Value[] newValue = new Value[size];
159
137
  for (int i = 0; i < size; i++) {
160
138
  String k = new StringBuilder(name).append("[").append(Integer.toString(i)).append("]").toString();
161
139
  Value v = arrayValue.get(i);
@@ -166,7 +144,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
166
144
  else if (value.isMapValue()) {
167
145
  MapValue mapValue = value.asMapValue();
168
146
  int size = mapValue.size() * 2;
169
- Value newValue[] = new Value[size];
147
+ Value[] newValue = new Value[size];
170
148
  int i = 0;
171
149
  for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
172
150
  Value k = entry.getKey();
@@ -179,7 +157,7 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
179
157
  return ValueFactory.newMap(newValue, true);
180
158
  }
181
159
  else if (value.isStringValue()) {
182
- String stringValue = value.asStringValue().asString() ;
160
+ String stringValue = value.asStringValue().asString();
183
161
  String newValue = formatTimestampString(task, name, stringValue);
184
162
  return (Objects.equals(newValue, stringValue)) ? value : ValueFactory.newString(newValue);
185
163
  }
@@ -203,18 +181,47 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
203
181
  catch (TimestampParseException ex) {
204
182
  if (task.getStopOnInvalidRecord()) {
205
183
  throw Throwables.propagate(ex);
206
- } else {
184
+ }
185
+ else {
207
186
  logger.warn("invalid value \"{}\":\"{}\"", name, value);
208
187
  return value;
209
188
  }
210
189
  }
211
190
  }
212
191
 
192
+ private PageReader pageReader = new PageReader(inputSchema);
193
+ private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
194
+ private ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
195
+
196
+ @Override
197
+ public void finish()
198
+ {
199
+ pageBuilder.finish();
200
+ }
201
+
202
+ @Override
203
+ public void close()
204
+ {
205
+ pageBuilder.close();
206
+ }
207
+
208
+ @Override
209
+ public void add(Page page)
210
+ {
211
+ pageReader.setPage(page);
212
+
213
+ while (pageReader.nextRecord()) {
214
+ outputSchema.visitColumns(visitor);
215
+ pageBuilder.addRecord();
216
+ }
217
+ }
218
+
213
219
  class ColumnVisitorImpl implements ColumnVisitor
214
220
  {
215
221
  private final PageBuilder pageBuilder;
216
222
 
217
- ColumnVisitorImpl(PageBuilder pageBuilder) {
223
+ ColumnVisitorImpl(PageBuilder pageBuilder)
224
+ {
218
225
  this.pageBuilder = pageBuilder;
219
226
  }
220
227
 
@@ -223,7 +230,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
223
230
  {
224
231
  if (pageReader.isNull(column)) {
225
232
  pageBuilder.setNull(column);
226
- } else {
233
+ }
234
+ else {
227
235
  pageBuilder.setBoolean(column, pageReader.getBoolean(column));
228
236
  }
229
237
  }
@@ -233,7 +241,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
233
241
  {
234
242
  if (pageReader.isNull(column)) {
235
243
  pageBuilder.setNull(column);
236
- } else {
244
+ }
245
+ else {
237
246
  pageBuilder.setLong(column, pageReader.getLong(column));
238
247
  }
239
248
  }
@@ -243,7 +252,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
243
252
  {
244
253
  if (pageReader.isNull(column)) {
245
254
  pageBuilder.setNull(column);
246
- } else {
255
+ }
256
+ else {
247
257
  pageBuilder.setDouble(column, pageReader.getDouble(column));
248
258
  }
249
259
  }
@@ -279,7 +289,8 @@ public class TimestampFormatFilterPlugin implements FilterPlugin
279
289
  {
280
290
  if (pageReader.isNull(column)) {
281
291
  pageBuilder.setNull(column);
282
- } else {
292
+ }
293
+ else {
283
294
  pageBuilder.setTimestamp(column, pageReader.getTimestamp(column));
284
295
  }
285
296
  }
@@ -1,19 +1,22 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
- import java.util.Locale;
4
- import org.jruby.embed.ScriptingContainer;
5
- import org.joda.time.DateTime;
6
- import org.joda.time.DateTimeZone;
7
3
  import com.google.common.base.Optional;
8
- import org.jruby.embed.ScriptingContainer;
9
- import org.jruby.util.RubyDateFormat;
4
+
10
5
  import org.embulk.config.Config;
11
6
  import org.embulk.config.ConfigDefault;
12
- import org.embulk.spi.util.LineEncoder;
13
- import org.embulk.spi.time.Timestamp;
14
7
 
15
8
  import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
16
9
 
10
+ import org.embulk.spi.time.Timestamp;
11
+ import org.embulk.spi.util.LineEncoder;
12
+
13
+ import org.joda.time.DateTime;
14
+ import org.joda.time.DateTimeZone;
15
+ import org.jruby.embed.ScriptingContainer;
16
+ import org.jruby.util.RubyDateFormat;
17
+
18
+ import java.util.Locale;
19
+
17
20
  public class TimestampFormatter
18
21
  {
19
22
  public interface Task
@@ -72,7 +75,7 @@ public class TimestampFormatter
72
75
  public String format(Timestamp value)
73
76
  {
74
77
  // TODO optimize by using reused StringBuilder
75
- toDateFormat.setDateTime(new DateTime(value.getEpochSecond()*1000, toTimeZone));
78
+ toDateFormat.setDateTime(new DateTime(value.getEpochSecond() * 1000, toTimeZone));
76
79
  toDateFormat.setNSec(value.getNano());
77
80
  return toDateFormat.format(null);
78
81
  }
@@ -1,19 +1,24 @@
1
1
  package org.embulk.filter.timestamp_format;
2
2
 
3
- import org.jruby.embed.ScriptingContainer;
4
- import org.joda.time.DateTimeZone;
5
3
  import com.google.common.base.Optional;
4
+
6
5
  import org.embulk.config.Config;
7
6
  import org.embulk.config.ConfigDefault;
8
- import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
9
- import org.embulk.spi.time.Timestamp;
10
- import org.embulk.spi.time.TimestampParseException;
7
+
8
+ import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
9
+
11
10
  import org.embulk.spi.time.JRubyTimeParserHelper;
12
11
  import org.embulk.spi.time.JRubyTimeParserHelperFactory;
12
+ import org.embulk.spi.time.Timestamp;
13
+ import org.embulk.spi.time.TimestampParseException;
14
+
15
+ import static org.embulk.spi.time.TimestampFormat.parseDateTimeZone;
16
+
17
+ import org.joda.time.DateTimeZone;
18
+ import org.jruby.embed.ScriptingContainer;
13
19
 
14
- import org.embulk.filter.TimestampFormatFilterPlugin.PluginTask;
15
- import java.util.List;
16
20
  import java.util.ArrayList;
21
+ import java.util.List;
17
22
 
18
23
  public class TimestampParser
19
24
  {
@@ -81,7 +86,8 @@ public class TimestampParser
81
86
  helper = h;
82
87
  try {
83
88
  localUsec = helper.strptimeUsec(text);
84
- } catch (TimestampParseException ex) {
89
+ }
90
+ catch (TimestampParseException ex) {
85
91
  exception = ex;
86
92
  }
87
93
  }
@@ -101,7 +107,7 @@ public class TimestampParser
101
107
 
102
108
  long localSec = localUsec / 1000000;
103
109
  long usec = localUsec % 1000000;
104
- long sec = timeZone.convertLocalToUTC(localSec*1000, false) / 1000;
110
+ long sec = timeZone.convertLocalToUTC(localSec * 1000, false) / 1000;
105
111
 
106
112
  return Timestamp.ofEpochSecond(sec, usec * 1000);
107
113
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-timestamp_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
@@ -77,12 +77,11 @@ files:
77
77
  - src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java
78
78
  - src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java
79
79
  - src/test/java/org/embulk/filter/TestTimestampFormatFilterPlugin.java
80
- - classpath/embulk-filter-timestamp_format-0.1.0.jar
80
+ - classpath/embulk-filter-timestamp_format-0.1.1.jar
81
81
  homepage: https://github.com/sonots/embulk-filter-timestamp_format
82
82
  licenses:
83
83
  - MIT
84
- metadata:
85
- allowed_push_host: https://rubygems.dena.jp
84
+ metadata: {}
86
85
  post_install_message:
87
86
  rdoc_options: []
88
87
  require_paths: