embulk 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +2 -3
- data/embulk-docs/src/built-in.rst +2 -0
- data/embulk-docs/src/release.rst +2 -0
- data/embulk-docs/src/release/release-0.6.26.rst +17 -0
- data/embulk-docs/src/release/release-0.7.2.rst +25 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +8 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +3 -1
- data/lib/embulk.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +2 -2
- data/lib/embulk/error.rb +20 -11
- data/lib/embulk/exec.rb +8 -0
- data/lib/embulk/plugin.rb +1 -0
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 69167ac2d89cb01cce7fe04d3380290604141720
|
|
4
|
+
data.tar.gz: cfcc577be3131bd9792d420425fd10dedf2df974
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ebe4adc87fd92f411e19db78e3ea58569c7902bf8e7f9dee97b797d5d7b3f502c056804d8e4d8a9989b6e61dc8e36476588fbf279d8232d48a4ea7842fa3411b
|
|
7
|
+
data.tar.gz: b2721835cd90a56cd9c97644a1af1429225542a8c0f1845014ce833e4f548dd48fed4511329f3f196da00d85783f8bb8bab73cf1e1f5effb6ff680fe5a13cacf
|
data/build.gradle
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
package org.embulk.spi.time;
|
|
2
2
|
|
|
3
|
-
import org.embulk.
|
|
3
|
+
import org.embulk.spi.DataException;
|
|
4
4
|
|
|
5
5
|
public class TimestampParseException
|
|
6
|
-
extends
|
|
7
|
-
implements UserDataException
|
|
6
|
+
extends DataException
|
|
8
7
|
{
|
|
9
8
|
public TimestampParseException(String message)
|
|
10
9
|
{
|
|
@@ -180,6 +180,8 @@ Options
|
|
|
180
180
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
181
181
|
| max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
|
|
182
182
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
183
|
+
| stop\_on\_invalid\_record | boolean | Stop bulk load transaction if a file includes invalid record (such as invalid timestamp) | ``false`` by default |
|
|
184
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
183
185
|
| default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
|
|
184
186
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
185
187
|
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
data/embulk-docs/src/release.rst
CHANGED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Release 0.6.26
|
|
2
|
+
==================================
|
|
3
|
+
|
|
4
|
+
Built-in plugins
|
|
5
|
+
------------------
|
|
6
|
+
|
|
7
|
+
* Added ``stop_on_invalid_record`` option to ``parser-csv`` plugin. This option stops bulkloading if it finds a broken record such as invalid timestamp or invalid integer format rathar than skipping it.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Ruby Plugin API
|
|
11
|
+
------------------
|
|
12
|
+
|
|
13
|
+
* Added ``Embulk::Exec.preview?`` which returns true if plugins are running in preview.
|
|
14
|
+
|
|
15
|
+
Release Date
|
|
16
|
+
------------------
|
|
17
|
+
2015-08-20
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Release 0.7.2
|
|
2
|
+
==================================
|
|
3
|
+
|
|
4
|
+
Built-in plugins
|
|
5
|
+
------------------
|
|
6
|
+
|
|
7
|
+
* Added ``stop_on_invalid_record`` option to ``parser-csv`` plugin. This option stops bulkloading if it finds a broken record such as invalid timestamp or invalid integer format rathar than skipping it.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Ruby Plugin API
|
|
11
|
+
------------------
|
|
12
|
+
|
|
13
|
+
* Added ``Embulk::Exec.preview?`` which returns true if plugins are running in preview.
|
|
14
|
+
* Fixed ``cannot be cast to org.jruby.RubyException`` when ConfigException is raised.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
General Changes
|
|
18
|
+
------------------
|
|
19
|
+
|
|
20
|
+
* Fixed ``embulk selfupdate`` subcommand (@cosmo0920++)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
Release Date
|
|
24
|
+
------------------
|
|
25
|
+
2015-08-20
|
|
@@ -90,6 +90,10 @@ public class CsvParserPlugin
|
|
|
90
90
|
@Config("allow_extra_columns")
|
|
91
91
|
@ConfigDefault("false")
|
|
92
92
|
boolean getAllowExtraColumns();
|
|
93
|
+
|
|
94
|
+
@Config("stop_on_invalid_record")
|
|
95
|
+
@ConfigDefault("false")
|
|
96
|
+
boolean getStopOnInvalidRecord();
|
|
93
97
|
}
|
|
94
98
|
|
|
95
99
|
public static class QuoteCharacter
|
|
@@ -230,6 +234,7 @@ public class CsvParserPlugin
|
|
|
230
234
|
final String nullStringOrNull = task.getNullString().orNull();
|
|
231
235
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
|
232
236
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
|
237
|
+
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
|
233
238
|
int skipHeaderLines = task.getSkipHeaderLines();
|
|
234
239
|
|
|
235
240
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
|
@@ -353,6 +358,9 @@ public class CsvParserPlugin
|
|
|
353
358
|
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
|
|
354
359
|
long lineNumber = tokenizer.getCurrentLineNumber();
|
|
355
360
|
String skippedLine = tokenizer.skipCurrentLine();
|
|
361
|
+
if (stopOnInvalidRecord) {
|
|
362
|
+
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
|
|
363
|
+
}
|
|
356
364
|
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
|
|
357
365
|
//exec.notice().skippedLine(skippedLine);
|
|
358
366
|
|
|
@@ -70,7 +70,9 @@ public class CsvTokenizer
|
|
|
70
70
|
// recover lines of quoted value
|
|
71
71
|
skippedLine = quotedValueLines.remove(0); // TODO optimize performance
|
|
72
72
|
unreadLines.addAll(quotedValueLines);
|
|
73
|
-
|
|
73
|
+
if (line != null) {
|
|
74
|
+
unreadLines.add(line);
|
|
75
|
+
}
|
|
74
76
|
lineNumber -= quotedValueLines.size();
|
|
75
77
|
quotedValueLines.clear();
|
|
76
78
|
}
|
data/lib/embulk.rb
CHANGED
|
@@ -173,7 +173,7 @@ examples:
|
|
|
173
173
|
|
|
174
174
|
when :selfupdate
|
|
175
175
|
op.on('-f', "Skip corruption check", TrueClass) do |b|
|
|
176
|
-
|
|
176
|
+
options[:force] = true
|
|
177
177
|
end
|
|
178
178
|
args = 0..0
|
|
179
179
|
|
|
@@ -253,7 +253,7 @@ examples:
|
|
|
253
253
|
|
|
254
254
|
when :selfupdate
|
|
255
255
|
require 'embulk/command/embulk_selfupdate'
|
|
256
|
-
Embulk.selfupdate(
|
|
256
|
+
Embulk.selfupdate(options)
|
|
257
257
|
|
|
258
258
|
else
|
|
259
259
|
require 'json'
|
data/lib/embulk/error.rb
CHANGED
|
@@ -1,17 +1,26 @@
|
|
|
1
1
|
|
|
2
2
|
module Embulk
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
# ConfigError is not a ::StandardError but is a java.lang.RuntimeException.
|
|
4
|
+
# "rescue => e" can rescues ConfigError.
|
|
5
|
+
class ConfigError < Java::Config::ConfigException
|
|
6
|
+
def initialize(message=nil)
|
|
7
|
+
if message
|
|
8
|
+
super(message.to_s)
|
|
9
|
+
else
|
|
10
|
+
super()
|
|
11
|
+
end
|
|
12
|
+
end
|
|
5
13
|
end
|
|
6
14
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
# DataError is not a ::StandardError but is a java.lang.RuntimeException.
|
|
16
|
+
# "rescue => e" can rescues DataError.
|
|
17
|
+
class DataError < Java::SPI::DataException
|
|
18
|
+
def initialize(message=nil)
|
|
19
|
+
if message
|
|
20
|
+
super(message.to_s)
|
|
21
|
+
else
|
|
22
|
+
super()
|
|
23
|
+
end
|
|
24
|
+
end
|
|
16
25
|
end
|
|
17
26
|
end
|
data/lib/embulk/exec.rb
ADDED
data/lib/embulk/plugin.rb
CHANGED
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sadayuki Furuhashi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-08-
|
|
11
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: jruby-jars
|
|
@@ -121,8 +121,8 @@ files:
|
|
|
121
121
|
- classpath/bval-jsr303-0.5.jar
|
|
122
122
|
- classpath/commons-beanutils-core-1.8.3.jar
|
|
123
123
|
- classpath/commons-lang3-3.1.jar
|
|
124
|
-
- classpath/embulk-core-0.7.
|
|
125
|
-
- classpath/embulk-standards-0.7.
|
|
124
|
+
- classpath/embulk-core-0.7.2.jar
|
|
125
|
+
- classpath/embulk-standards-0.7.2.jar
|
|
126
126
|
- classpath/guava-18.0.jar
|
|
127
127
|
- classpath/guice-4.0.jar
|
|
128
128
|
- classpath/guice-multibindings-4.0.jar
|
|
@@ -409,6 +409,7 @@ files:
|
|
|
409
409
|
- embulk-docs/src/release/release-0.6.23.rst
|
|
410
410
|
- embulk-docs/src/release/release-0.6.24.rst
|
|
411
411
|
- embulk-docs/src/release/release-0.6.25.rst
|
|
412
|
+
- embulk-docs/src/release/release-0.6.26.rst
|
|
412
413
|
- embulk-docs/src/release/release-0.6.3.rst
|
|
413
414
|
- embulk-docs/src/release/release-0.6.4.rst
|
|
414
415
|
- embulk-docs/src/release/release-0.6.5.rst
|
|
@@ -418,6 +419,7 @@ files:
|
|
|
418
419
|
- embulk-docs/src/release/release-0.6.9.rst
|
|
419
420
|
- embulk-docs/src/release/release-0.7.0.rst
|
|
420
421
|
- embulk-docs/src/release/release-0.7.1.rst
|
|
422
|
+
- embulk-docs/src/release/release-0.7.2.rst
|
|
421
423
|
- embulk-standards/build.gradle
|
|
422
424
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
|
423
425
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
|
@@ -493,6 +495,7 @@ files:
|
|
|
493
495
|
- lib/embulk/decoder_plugin.rb
|
|
494
496
|
- lib/embulk/encoder_plugin.rb
|
|
495
497
|
- lib/embulk/error.rb
|
|
498
|
+
- lib/embulk/exec.rb
|
|
496
499
|
- lib/embulk/executor_plugin.rb
|
|
497
500
|
- lib/embulk/file_input.rb
|
|
498
501
|
- lib/embulk/file_input_plugin.rb
|