embulk 0.7.1 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +2 -3
- data/embulk-docs/src/built-in.rst +2 -0
- data/embulk-docs/src/release.rst +2 -0
- data/embulk-docs/src/release/release-0.6.26.rst +17 -0
- data/embulk-docs/src/release/release-0.7.2.rst +25 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +8 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +3 -1
- data/lib/embulk.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +2 -2
- data/lib/embulk/error.rb +20 -11
- data/lib/embulk/exec.rb +8 -0
- data/lib/embulk/plugin.rb +1 -0
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69167ac2d89cb01cce7fe04d3380290604141720
|
4
|
+
data.tar.gz: cfcc577be3131bd9792d420425fd10dedf2df974
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebe4adc87fd92f411e19db78e3ea58569c7902bf8e7f9dee97b797d5d7b3f502c056804d8e4d8a9989b6e61dc8e36476588fbf279d8232d48a4ea7842fa3411b
|
7
|
+
data.tar.gz: b2721835cd90a56cd9c97644a1af1429225542a8c0f1845014ce833e4f548dd48fed4511329f3f196da00d85783f8bb8bab73cf1e1f5effb6ff680fe5a13cacf
|
data/build.gradle
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
package org.embulk.spi.time;
|
2
2
|
|
3
|
-
import org.embulk.
|
3
|
+
import org.embulk.spi.DataException;
|
4
4
|
|
5
5
|
public class TimestampParseException
|
6
|
-
extends
|
7
|
-
implements UserDataException
|
6
|
+
extends DataException
|
8
7
|
{
|
9
8
|
public TimestampParseException(String message)
|
10
9
|
{
|
@@ -180,6 +180,8 @@ Options
|
|
180
180
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
181
181
|
| max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
|
182
182
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
183
|
+
| stop\_on\_invalid\_record | boolean | Stop bulk load transaction if a file includes invalid record (such as invalid timestamp) | ``false`` by default |
|
184
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
183
185
|
| default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
|
184
186
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
185
187
|
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
Release 0.6.26
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Added ``stop_on_invalid_record`` option to ``parser-csv`` plugin. This option stops bulkloading if it finds a broken record such as invalid timestamp or invalid integer format rathar than skipping it.
|
8
|
+
|
9
|
+
|
10
|
+
Ruby Plugin API
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* Added ``Embulk::Exec.preview?`` which returns true if plugins are running in preview.
|
14
|
+
|
15
|
+
Release Date
|
16
|
+
------------------
|
17
|
+
2015-08-20
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Release 0.7.2
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Added ``stop_on_invalid_record`` option to ``parser-csv`` plugin. This option stops bulkloading if it finds a broken record such as invalid timestamp or invalid integer format rathar than skipping it.
|
8
|
+
|
9
|
+
|
10
|
+
Ruby Plugin API
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* Added ``Embulk::Exec.preview?`` which returns true if plugins are running in preview.
|
14
|
+
* Fixed ``cannot be cast to org.jruby.RubyException`` when ConfigException is raised.
|
15
|
+
|
16
|
+
|
17
|
+
General Changes
|
18
|
+
------------------
|
19
|
+
|
20
|
+
* Fixed ``embulk selfupdate`` subcommand (@cosmo0920++)
|
21
|
+
|
22
|
+
|
23
|
+
Release Date
|
24
|
+
------------------
|
25
|
+
2015-08-20
|
@@ -90,6 +90,10 @@ public class CsvParserPlugin
|
|
90
90
|
@Config("allow_extra_columns")
|
91
91
|
@ConfigDefault("false")
|
92
92
|
boolean getAllowExtraColumns();
|
93
|
+
|
94
|
+
@Config("stop_on_invalid_record")
|
95
|
+
@ConfigDefault("false")
|
96
|
+
boolean getStopOnInvalidRecord();
|
93
97
|
}
|
94
98
|
|
95
99
|
public static class QuoteCharacter
|
@@ -230,6 +234,7 @@ public class CsvParserPlugin
|
|
230
234
|
final String nullStringOrNull = task.getNullString().orNull();
|
231
235
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
232
236
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
237
|
+
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
233
238
|
int skipHeaderLines = task.getSkipHeaderLines();
|
234
239
|
|
235
240
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
@@ -353,6 +358,9 @@ public class CsvParserPlugin
|
|
353
358
|
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
|
354
359
|
long lineNumber = tokenizer.getCurrentLineNumber();
|
355
360
|
String skippedLine = tokenizer.skipCurrentLine();
|
361
|
+
if (stopOnInvalidRecord) {
|
362
|
+
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
|
363
|
+
}
|
356
364
|
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
|
357
365
|
//exec.notice().skippedLine(skippedLine);
|
358
366
|
|
@@ -70,7 +70,9 @@ public class CsvTokenizer
|
|
70
70
|
// recover lines of quoted value
|
71
71
|
skippedLine = quotedValueLines.remove(0); // TODO optimize performance
|
72
72
|
unreadLines.addAll(quotedValueLines);
|
73
|
-
|
73
|
+
if (line != null) {
|
74
|
+
unreadLines.add(line);
|
75
|
+
}
|
74
76
|
lineNumber -= quotedValueLines.size();
|
75
77
|
quotedValueLines.clear();
|
76
78
|
}
|
data/lib/embulk.rb
CHANGED
@@ -173,7 +173,7 @@ examples:
|
|
173
173
|
|
174
174
|
when :selfupdate
|
175
175
|
op.on('-f', "Skip corruption check", TrueClass) do |b|
|
176
|
-
|
176
|
+
options[:force] = true
|
177
177
|
end
|
178
178
|
args = 0..0
|
179
179
|
|
@@ -253,7 +253,7 @@ examples:
|
|
253
253
|
|
254
254
|
when :selfupdate
|
255
255
|
require 'embulk/command/embulk_selfupdate'
|
256
|
-
Embulk.selfupdate(
|
256
|
+
Embulk.selfupdate(options)
|
257
257
|
|
258
258
|
else
|
259
259
|
require 'json'
|
data/lib/embulk/error.rb
CHANGED
@@ -1,17 +1,26 @@
|
|
1
1
|
|
2
2
|
module Embulk
|
3
|
-
|
4
|
-
|
3
|
+
# ConfigError is not a ::StandardError but is a java.lang.RuntimeException.
|
4
|
+
# "rescue => e" can rescues ConfigError.
|
5
|
+
class ConfigError < Java::Config::ConfigException
|
6
|
+
def initialize(message=nil)
|
7
|
+
if message
|
8
|
+
super(message.to_s)
|
9
|
+
else
|
10
|
+
super()
|
11
|
+
end
|
12
|
+
end
|
5
13
|
end
|
6
14
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
# DataError is not a ::StandardError but is a java.lang.RuntimeException.
|
16
|
+
# "rescue => e" can rescues DataError.
|
17
|
+
class DataError < Java::SPI::DataException
|
18
|
+
def initialize(message=nil)
|
19
|
+
if message
|
20
|
+
super(message.to_s)
|
21
|
+
else
|
22
|
+
super()
|
23
|
+
end
|
24
|
+
end
|
16
25
|
end
|
17
26
|
end
|
data/lib/embulk/exec.rb
ADDED
data/lib/embulk/plugin.rb
CHANGED
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jruby-jars
|
@@ -121,8 +121,8 @@ files:
|
|
121
121
|
- classpath/bval-jsr303-0.5.jar
|
122
122
|
- classpath/commons-beanutils-core-1.8.3.jar
|
123
123
|
- classpath/commons-lang3-3.1.jar
|
124
|
-
- classpath/embulk-core-0.7.
|
125
|
-
- classpath/embulk-standards-0.7.
|
124
|
+
- classpath/embulk-core-0.7.2.jar
|
125
|
+
- classpath/embulk-standards-0.7.2.jar
|
126
126
|
- classpath/guava-18.0.jar
|
127
127
|
- classpath/guice-4.0.jar
|
128
128
|
- classpath/guice-multibindings-4.0.jar
|
@@ -409,6 +409,7 @@ files:
|
|
409
409
|
- embulk-docs/src/release/release-0.6.23.rst
|
410
410
|
- embulk-docs/src/release/release-0.6.24.rst
|
411
411
|
- embulk-docs/src/release/release-0.6.25.rst
|
412
|
+
- embulk-docs/src/release/release-0.6.26.rst
|
412
413
|
- embulk-docs/src/release/release-0.6.3.rst
|
413
414
|
- embulk-docs/src/release/release-0.6.4.rst
|
414
415
|
- embulk-docs/src/release/release-0.6.5.rst
|
@@ -418,6 +419,7 @@ files:
|
|
418
419
|
- embulk-docs/src/release/release-0.6.9.rst
|
419
420
|
- embulk-docs/src/release/release-0.7.0.rst
|
420
421
|
- embulk-docs/src/release/release-0.7.1.rst
|
422
|
+
- embulk-docs/src/release/release-0.7.2.rst
|
421
423
|
- embulk-standards/build.gradle
|
422
424
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
423
425
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -493,6 +495,7 @@ files:
|
|
493
495
|
- lib/embulk/decoder_plugin.rb
|
494
496
|
- lib/embulk/encoder_plugin.rb
|
495
497
|
- lib/embulk/error.rb
|
498
|
+
- lib/embulk/exec.rb
|
496
499
|
- lib/embulk/executor_plugin.rb
|
497
500
|
- lib/embulk/file_input.rb
|
498
501
|
- lib/embulk/file_input_plugin.rb
|