embulk 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -2
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +31 -48
- data/embulk-docs/push-gh-pages.sh +3 -3
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.2.rst +17 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +1 -1
- data/lib/embulk/guess/charset.rb +12 -5
- data/lib/embulk/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 148c75f1b85e1d85859d0be3293d85e5fc3f1bab
|
4
|
+
data.tar.gz: c07d2801fc7cc4b0caa60d2ff692553b29827cfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f499389219a19663e1e3789b68bf1eea7f650df7921347ec5d0641395a1a1b11a5087f571a9e0ac3536cc1156592cb15b1f5b4283e069f68d456449c90d4c69
|
7
|
+
data.tar.gz: 78eeb275c30689f63947961fd1d2f0b01d96abfef0c16b1e68370fdca0570c809f1e4cb38924e67c10f11e86759c88e706dcb32c45f55eaf10876055bdedf8ea
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest http://dl.embulk.org/embulk-latest.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying the example](#trying-the-example)
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.6.
|
15
|
+
version = '0.6.2'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -252,7 +252,6 @@ task set_version << {
|
|
252
252
|
|
253
253
|
List<String> docs = [
|
254
254
|
'README.md',
|
255
|
-
'embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst'
|
256
255
|
]
|
257
256
|
docs.each() { path ->
|
258
257
|
File doc = file(path)
|
@@ -327,28 +327,6 @@ public class BulkLoader
|
|
327
327
|
}
|
328
328
|
}
|
329
329
|
|
330
|
-
protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
|
331
|
-
{
|
332
|
-
return Exec.newPlugin(ExecutorPlugin.class,
|
333
|
-
task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
|
334
|
-
}
|
335
|
-
|
336
|
-
protected InputPlugin newInputPlugin(BulkLoaderTask task)
|
337
|
-
{
|
338
|
-
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
339
|
-
}
|
340
|
-
|
341
|
-
protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
|
342
|
-
{
|
343
|
-
return Filters.newFilterPlugins(Exec.session(),
|
344
|
-
Filters.getPluginTypes(task.getFilterConfigs()));
|
345
|
-
}
|
346
|
-
|
347
|
-
protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
|
348
|
-
{
|
349
|
-
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
350
|
-
}
|
351
|
-
|
352
330
|
public ExecutionResult run(ExecSession exec, final ConfigSource config)
|
353
331
|
{
|
354
332
|
try {
|
@@ -400,32 +378,6 @@ public class BulkLoader
|
|
400
378
|
}
|
401
379
|
}
|
402
380
|
|
403
|
-
public void doCleanup(ConfigSource config, ResumeState resume)
|
404
|
-
{
|
405
|
-
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
406
|
-
InputPlugin inputPlugin = newInputPlugin(task);
|
407
|
-
OutputPlugin outputPlugin = newOutputPlugin(task);
|
408
|
-
|
409
|
-
ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
|
410
|
-
ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
|
411
|
-
for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
|
412
|
-
if (inputCommitReport.isPresent()) {
|
413
|
-
successfulInputCommitReports.add(inputCommitReport.get());
|
414
|
-
}
|
415
|
-
}
|
416
|
-
for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
|
417
|
-
if (outputCommitReport.isPresent()) {
|
418
|
-
successfulOutputCommitReports.add(outputCommitReport.get());
|
419
|
-
}
|
420
|
-
}
|
421
|
-
|
422
|
-
inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
423
|
-
resume.getInputCommitReports().size(), successfulInputCommitReports.build());
|
424
|
-
|
425
|
-
outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
426
|
-
resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
|
427
|
-
}
|
428
|
-
|
429
381
|
private static class ProcessPluginSet
|
430
382
|
{
|
431
383
|
private final PluginType inputPluginType;
|
@@ -477,6 +429,37 @@ public class BulkLoader
|
|
477
429
|
}
|
478
430
|
}
|
479
431
|
|
432
|
+
public void doCleanup(ConfigSource config, ResumeState resume)
|
433
|
+
{
|
434
|
+
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
435
|
+
ProcessPluginSet plugins = new ProcessPluginSet(task); // TODO don't create filter plugins
|
436
|
+
|
437
|
+
ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
|
438
|
+
ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
|
439
|
+
for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
|
440
|
+
if (inputCommitReport.isPresent()) {
|
441
|
+
successfulInputCommitReports.add(inputCommitReport.get());
|
442
|
+
}
|
443
|
+
}
|
444
|
+
for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
|
445
|
+
if (outputCommitReport.isPresent()) {
|
446
|
+
successfulOutputCommitReports.add(outputCommitReport.get());
|
447
|
+
}
|
448
|
+
}
|
449
|
+
|
450
|
+
plugins.getInputPlugin().cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
451
|
+
resume.getInputCommitReports().size(), successfulInputCommitReports.build());
|
452
|
+
|
453
|
+
plugins.getOutputPlugin().cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
454
|
+
resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
|
455
|
+
}
|
456
|
+
|
457
|
+
private ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
|
458
|
+
{
|
459
|
+
return Exec.newPlugin(ExecutorPlugin.class,
|
460
|
+
task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
|
461
|
+
}
|
462
|
+
|
480
463
|
private ExecutionResult doRun(ConfigSource config)
|
481
464
|
{
|
482
465
|
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
@@ -16,7 +16,7 @@ function r() {
|
|
16
16
|
[ "$TRAVIS_BRANCH" != "master" -a "$TRAVIS_BRANCH" != "$(git describe --tags --always HEAD)" ] && exit 0
|
17
17
|
|
18
18
|
revision="$(git rev-parse HEAD)"
|
19
|
-
remote="
|
19
|
+
remote="https://github.com/embulk/embulk.github.io.git"
|
20
20
|
re ./gradlew site
|
21
21
|
|
22
22
|
r git fetch --unshallow || echo "using complete repository."
|
@@ -28,7 +28,7 @@ re cd gh_pages
|
|
28
28
|
re git remote add travis_push "$remote"
|
29
29
|
re git fetch travis_push
|
30
30
|
|
31
|
-
re git checkout -b gh-pages travis_push/
|
31
|
+
re git checkout -b gh-pages travis_push/master
|
32
32
|
re rm -rf docs
|
33
33
|
re cp -a ../embulk-docs/build/html docs
|
34
34
|
re git add --all docs
|
@@ -46,4 +46,4 @@ fi
|
|
46
46
|
re git config credential.helper "store --file=$HOME/.git_credentials"
|
47
47
|
echo "https://$GITHUB_TOKEN:@github.com" > "$HOME/.git_credentials"
|
48
48
|
trap "rm -rf $HOME/.git_credentials" EXIT
|
49
|
-
re git push travis_push gh-pages
|
49
|
+
re git push travis_push gh-pages:master
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget
|
59
|
+
$ sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
Release 0.6.2
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``guess-csv`` guesses charset as ``"MS932"`` instead of ``"Shift_JIS"`` because practically almost all of documents encoded by ``Shift_JIS`` are created by Windows and ``Shift_JIS`` implemented by Microsoft means ``MS932`` in Java. (@kosaki55tea++, @nalsh++)
|
8
|
+
* ``parser-csv`` recovers errors by invalid number formats and skips the row rather than making entire transaction failed (@hito4t++)
|
9
|
+
|
10
|
+
General Changes
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* Changed download URL. We can use the consitent URL to download the latest jar.
|
14
|
+
|
15
|
+
Release Date
|
16
|
+
------------------
|
17
|
+
2015-04-13
|
@@ -233,7 +233,7 @@ public class CsvParserPlugin
|
|
233
233
|
});
|
234
234
|
pageBuilder.addRecord();
|
235
235
|
|
236
|
-
} catch (CsvTokenizer.InvalidFormatException e) {
|
236
|
+
} catch (CsvTokenizer.InvalidFormatException | CsvRecordValidateException e) {
|
237
237
|
long lineNumber = tokenizer.getCurrentLineNumber();
|
238
238
|
String skippedLine = tokenizer.skipCurrentLine();
|
239
239
|
log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
|
data/lib/embulk/guess/charset.rb
CHANGED
@@ -4,6 +4,16 @@ module Embulk
|
|
4
4
|
class CharsetGuessPlugin < GuessPlugin
|
5
5
|
Plugin.register_guess('charset', self)
|
6
6
|
|
7
|
+
STATIC_MAPPING = {
|
8
|
+
# ISO-8859-1 means ASCII which is a subset of UTF-8 in most of cases
|
9
|
+
# due to lack of sample data set.
|
10
|
+
"ISO-8859-1" => "UTF-8",
|
11
|
+
|
12
|
+
# Shift_JIS is used almost only by Windows that uses "CP932" in fact.
|
13
|
+
# And "CP932" called by Microsoft actually means "MS932" in Java.
|
14
|
+
"Shift_JIS" => "MS932",
|
15
|
+
}
|
16
|
+
|
7
17
|
def guess(config, sample_buffer)
|
8
18
|
# ICU4J
|
9
19
|
detector = com.ibm.icu.text.CharsetDetector.new
|
@@ -13,11 +23,8 @@ module Embulk
|
|
13
23
|
name = "UTF-8"
|
14
24
|
else
|
15
25
|
name = best_match.getName
|
16
|
-
if
|
17
|
-
|
18
|
-
# of UTF-8 in most of cases due to lack of
|
19
|
-
# sample data set
|
20
|
-
name = "UTF-8"
|
26
|
+
if mapped_name = STATIC_MAPPING[name]
|
27
|
+
name = mapped_name
|
21
28
|
end
|
22
29
|
end
|
23
30
|
return {"parser" => {"charset" => name}}
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -290,6 +290,7 @@ files:
|
|
290
290
|
- embulk-docs/src/release/release-0.5.5.rst
|
291
291
|
- embulk-docs/src/release/release-0.6.0.rst
|
292
292
|
- embulk-docs/src/release/release-0.6.1.rst
|
293
|
+
- embulk-docs/src/release/release-0.6.2.rst
|
293
294
|
- embulk-standards/build.gradle
|
294
295
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
295
296
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -395,8 +396,8 @@ files:
|
|
395
396
|
- classpath/bval-jsr303-0.5.jar
|
396
397
|
- classpath/commons-beanutils-core-1.8.3.jar
|
397
398
|
- classpath/commons-lang3-3.1.jar
|
398
|
-
- classpath/embulk-core-0.6.
|
399
|
-
- classpath/embulk-standards-0.6.
|
399
|
+
- classpath/embulk-core-0.6.2.jar
|
400
|
+
- classpath/embulk-standards-0.6.2.jar
|
400
401
|
- classpath/guava-18.0.jar
|
401
402
|
- classpath/guice-3.0.jar
|
402
403
|
- classpath/guice-multibindings-3.0.jar
|