embulk 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -2
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +31 -48
- data/embulk-docs/push-gh-pages.sh +3 -3
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.2.rst +17 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +1 -1
- data/lib/embulk/guess/charset.rb +12 -5
- data/lib/embulk/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 148c75f1b85e1d85859d0be3293d85e5fc3f1bab
|
4
|
+
data.tar.gz: c07d2801fc7cc4b0caa60d2ff692553b29827cfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f499389219a19663e1e3789b68bf1eea7f650df7921347ec5d0641395a1a1b11a5087f571a9e0ac3536cc1156592cb15b1f5b4283e069f68d456449c90d4c69
|
7
|
+
data.tar.gz: 78eeb275c30689f63947961fd1d2f0b01d96abfef0c16b1e68370fdca0570c809f1e4cb38924e67c10f11e86759c88e706dcb32c45f55eaf10876055bdedf8ea
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest http://dl.embulk.org/embulk-latest.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying the example](#trying-the-example)
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.6.
|
15
|
+
version = '0.6.2'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -252,7 +252,6 @@ task set_version << {
|
|
252
252
|
|
253
253
|
List<String> docs = [
|
254
254
|
'README.md',
|
255
|
-
'embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst'
|
256
255
|
]
|
257
256
|
docs.each() { path ->
|
258
257
|
File doc = file(path)
|
@@ -327,28 +327,6 @@ public class BulkLoader
|
|
327
327
|
}
|
328
328
|
}
|
329
329
|
|
330
|
-
protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
|
331
|
-
{
|
332
|
-
return Exec.newPlugin(ExecutorPlugin.class,
|
333
|
-
task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
|
334
|
-
}
|
335
|
-
|
336
|
-
protected InputPlugin newInputPlugin(BulkLoaderTask task)
|
337
|
-
{
|
338
|
-
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
339
|
-
}
|
340
|
-
|
341
|
-
protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
|
342
|
-
{
|
343
|
-
return Filters.newFilterPlugins(Exec.session(),
|
344
|
-
Filters.getPluginTypes(task.getFilterConfigs()));
|
345
|
-
}
|
346
|
-
|
347
|
-
protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
|
348
|
-
{
|
349
|
-
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
350
|
-
}
|
351
|
-
|
352
330
|
public ExecutionResult run(ExecSession exec, final ConfigSource config)
|
353
331
|
{
|
354
332
|
try {
|
@@ -400,32 +378,6 @@ public class BulkLoader
|
|
400
378
|
}
|
401
379
|
}
|
402
380
|
|
403
|
-
public void doCleanup(ConfigSource config, ResumeState resume)
|
404
|
-
{
|
405
|
-
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
406
|
-
InputPlugin inputPlugin = newInputPlugin(task);
|
407
|
-
OutputPlugin outputPlugin = newOutputPlugin(task);
|
408
|
-
|
409
|
-
ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
|
410
|
-
ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
|
411
|
-
for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
|
412
|
-
if (inputCommitReport.isPresent()) {
|
413
|
-
successfulInputCommitReports.add(inputCommitReport.get());
|
414
|
-
}
|
415
|
-
}
|
416
|
-
for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
|
417
|
-
if (outputCommitReport.isPresent()) {
|
418
|
-
successfulOutputCommitReports.add(outputCommitReport.get());
|
419
|
-
}
|
420
|
-
}
|
421
|
-
|
422
|
-
inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
423
|
-
resume.getInputCommitReports().size(), successfulInputCommitReports.build());
|
424
|
-
|
425
|
-
outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
426
|
-
resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
|
427
|
-
}
|
428
|
-
|
429
381
|
private static class ProcessPluginSet
|
430
382
|
{
|
431
383
|
private final PluginType inputPluginType;
|
@@ -477,6 +429,37 @@ public class BulkLoader
|
|
477
429
|
}
|
478
430
|
}
|
479
431
|
|
432
|
+
public void doCleanup(ConfigSource config, ResumeState resume)
|
433
|
+
{
|
434
|
+
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
435
|
+
ProcessPluginSet plugins = new ProcessPluginSet(task); // TODO don't create filter plugins
|
436
|
+
|
437
|
+
ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
|
438
|
+
ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
|
439
|
+
for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
|
440
|
+
if (inputCommitReport.isPresent()) {
|
441
|
+
successfulInputCommitReports.add(inputCommitReport.get());
|
442
|
+
}
|
443
|
+
}
|
444
|
+
for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
|
445
|
+
if (outputCommitReport.isPresent()) {
|
446
|
+
successfulOutputCommitReports.add(outputCommitReport.get());
|
447
|
+
}
|
448
|
+
}
|
449
|
+
|
450
|
+
plugins.getInputPlugin().cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
451
|
+
resume.getInputCommitReports().size(), successfulInputCommitReports.build());
|
452
|
+
|
453
|
+
plugins.getOutputPlugin().cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
454
|
+
resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
|
455
|
+
}
|
456
|
+
|
457
|
+
private ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
|
458
|
+
{
|
459
|
+
return Exec.newPlugin(ExecutorPlugin.class,
|
460
|
+
task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
|
461
|
+
}
|
462
|
+
|
480
463
|
private ExecutionResult doRun(ConfigSource config)
|
481
464
|
{
|
482
465
|
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
@@ -16,7 +16,7 @@ function r() {
|
|
16
16
|
[ "$TRAVIS_BRANCH" != "master" -a "$TRAVIS_BRANCH" != "$(git describe --tags --always HEAD)" ] && exit 0
|
17
17
|
|
18
18
|
revision="$(git rev-parse HEAD)"
|
19
|
-
remote="
|
19
|
+
remote="https://github.com/embulk/embulk.github.io.git"
|
20
20
|
re ./gradlew site
|
21
21
|
|
22
22
|
r git fetch --unshallow || echo "using complete repository."
|
@@ -28,7 +28,7 @@ re cd gh_pages
|
|
28
28
|
re git remote add travis_push "$remote"
|
29
29
|
re git fetch travis_push
|
30
30
|
|
31
|
-
re git checkout -b gh-pages travis_push/
|
31
|
+
re git checkout -b gh-pages travis_push/master
|
32
32
|
re rm -rf docs
|
33
33
|
re cp -a ../embulk-docs/build/html docs
|
34
34
|
re git add --all docs
|
@@ -46,4 +46,4 @@ fi
|
|
46
46
|
re git config credential.helper "store --file=$HOME/.git_credentials"
|
47
47
|
echo "https://$GITHUB_TOKEN:@github.com" > "$HOME/.git_credentials"
|
48
48
|
trap "rm -rf $HOME/.git_credentials" EXIT
|
49
|
-
re git push travis_push gh-pages
|
49
|
+
re git push travis_push gh-pages:master
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget
|
59
|
+
$ sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
Release 0.6.2
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``guess-csv`` guesses charset as ``"MS932"`` instead of ``"Shift_JIS"`` because practically almost all of documents encoded by ``Shift_JIS`` are created by Windows and ``Shift_JIS`` implemented by Microsoft means ``MS932`` in Java. (@kosaki55tea++, @nalsh++)
|
8
|
+
* ``parser-csv`` recovers errors by invalid number formats and skips the row rather than making entire transaction failed (@hito4t++)
|
9
|
+
|
10
|
+
General Changes
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* Changed download URL. We can use the consitent URL to download the latest jar.
|
14
|
+
|
15
|
+
Release Date
|
16
|
+
------------------
|
17
|
+
2015-04-13
|
@@ -233,7 +233,7 @@ public class CsvParserPlugin
|
|
233
233
|
});
|
234
234
|
pageBuilder.addRecord();
|
235
235
|
|
236
|
-
} catch (CsvTokenizer.InvalidFormatException e) {
|
236
|
+
} catch (CsvTokenizer.InvalidFormatException | CsvRecordValidateException e) {
|
237
237
|
long lineNumber = tokenizer.getCurrentLineNumber();
|
238
238
|
String skippedLine = tokenizer.skipCurrentLine();
|
239
239
|
log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
|
data/lib/embulk/guess/charset.rb
CHANGED
@@ -4,6 +4,16 @@ module Embulk
|
|
4
4
|
class CharsetGuessPlugin < GuessPlugin
|
5
5
|
Plugin.register_guess('charset', self)
|
6
6
|
|
7
|
+
STATIC_MAPPING = {
|
8
|
+
# ISO-8859-1 means ASCII which is a subset of UTF-8 in most of cases
|
9
|
+
# due to lack of sample data set.
|
10
|
+
"ISO-8859-1" => "UTF-8",
|
11
|
+
|
12
|
+
# Shift_JIS is used almost only by Windows that uses "CP932" in fact.
|
13
|
+
# And "CP932" called by Microsoft actually means "MS932" in Java.
|
14
|
+
"Shift_JIS" => "MS932",
|
15
|
+
}
|
16
|
+
|
7
17
|
def guess(config, sample_buffer)
|
8
18
|
# ICU4J
|
9
19
|
detector = com.ibm.icu.text.CharsetDetector.new
|
@@ -13,11 +23,8 @@ module Embulk
|
|
13
23
|
name = "UTF-8"
|
14
24
|
else
|
15
25
|
name = best_match.getName
|
16
|
-
if
|
17
|
-
|
18
|
-
# of UTF-8 in most of cases due to lack of
|
19
|
-
# sample data set
|
20
|
-
name = "UTF-8"
|
26
|
+
if mapped_name = STATIC_MAPPING[name]
|
27
|
+
name = mapped_name
|
21
28
|
end
|
22
29
|
end
|
23
30
|
return {"parser" => {"charset" => name}}
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -290,6 +290,7 @@ files:
|
|
290
290
|
- embulk-docs/src/release/release-0.5.5.rst
|
291
291
|
- embulk-docs/src/release/release-0.6.0.rst
|
292
292
|
- embulk-docs/src/release/release-0.6.1.rst
|
293
|
+
- embulk-docs/src/release/release-0.6.2.rst
|
293
294
|
- embulk-standards/build.gradle
|
294
295
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
295
296
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -395,8 +396,8 @@ files:
|
|
395
396
|
- classpath/bval-jsr303-0.5.jar
|
396
397
|
- classpath/commons-beanutils-core-1.8.3.jar
|
397
398
|
- classpath/commons-lang3-3.1.jar
|
398
|
-
- classpath/embulk-core-0.6.
|
399
|
-
- classpath/embulk-standards-0.6.
|
399
|
+
- classpath/embulk-core-0.6.2.jar
|
400
|
+
- classpath/embulk-standards-0.6.2.jar
|
400
401
|
- classpath/guava-18.0.jar
|
401
402
|
- classpath/guice-3.0.jar
|
402
403
|
- classpath/guice-multibindings-3.0.jar
|