embulk 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6de3d95e030e043dd82676af00373a34a2e50e47
4
- data.tar.gz: 554e7b89b160fe65d540eeffd1dd03403b08fdaf
3
+ metadata.gz: 148c75f1b85e1d85859d0be3293d85e5fc3f1bab
4
+ data.tar.gz: c07d2801fc7cc4b0caa60d2ff692553b29827cfe
5
5
  SHA512:
6
- metadata.gz: ad8d669c190e0d468c44803fbf5b4614302e19e582a1f66cf99540a4a00ae1baf9261ad568cd6116c8c22fbefe3a7d1d13585ab25e8cbc38ec79837ce823fd40
7
- data.tar.gz: 562d3cff28a2ba94de942a36e768e20f3205cf44a8b95551223702fce8fe1cfe974eb9ce6c6a8cb4b5416189f9b5dadf0b4c5e2657282c90b0932bf4d9cf955c
6
+ metadata.gz: 6f499389219a19663e1e3789b68bf1eea7f650df7921347ec5d0641395a1a1b11a5087f571a9e0ac3536cc1156592cb15b1f5b4283e069f68d456449c90d4c69
7
+ data.tar.gz: 78eeb275c30689f63947961fd1d2f0b01d96abfef0c16b1e68370fdca0570c809f1e4cb38924e67c10f11e86759c88e706dcb32c45f55eaf10876055bdedf8ea
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest http://dl.embulk.org/embulk-latest.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying the example](#trying-the-example)
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.6.1'
15
+ version = '0.6.2'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -252,7 +252,6 @@ task set_version << {
252
252
 
253
253
  List<String> docs = [
254
254
  'README.md',
255
- 'embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst'
256
255
  ]
257
256
  docs.each() { path ->
258
257
  File doc = file(path)
@@ -327,28 +327,6 @@ public class BulkLoader
327
327
  }
328
328
  }
329
329
 
330
- protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
331
- {
332
- return Exec.newPlugin(ExecutorPlugin.class,
333
- task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
334
- }
335
-
336
- protected InputPlugin newInputPlugin(BulkLoaderTask task)
337
- {
338
- return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
339
- }
340
-
341
- protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
342
- {
343
- return Filters.newFilterPlugins(Exec.session(),
344
- Filters.getPluginTypes(task.getFilterConfigs()));
345
- }
346
-
347
- protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
348
- {
349
- return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
350
- }
351
-
352
330
  public ExecutionResult run(ExecSession exec, final ConfigSource config)
353
331
  {
354
332
  try {
@@ -400,32 +378,6 @@ public class BulkLoader
400
378
  }
401
379
  }
402
380
 
403
- public void doCleanup(ConfigSource config, ResumeState resume)
404
- {
405
- BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
406
- InputPlugin inputPlugin = newInputPlugin(task);
407
- OutputPlugin outputPlugin = newOutputPlugin(task);
408
-
409
- ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
410
- ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
411
- for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
412
- if (inputCommitReport.isPresent()) {
413
- successfulInputCommitReports.add(inputCommitReport.get());
414
- }
415
- }
416
- for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
417
- if (outputCommitReport.isPresent()) {
418
- successfulOutputCommitReports.add(outputCommitReport.get());
419
- }
420
- }
421
-
422
- inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
423
- resume.getInputCommitReports().size(), successfulInputCommitReports.build());
424
-
425
- outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
426
- resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
427
- }
428
-
429
381
  private static class ProcessPluginSet
430
382
  {
431
383
  private final PluginType inputPluginType;
@@ -477,6 +429,37 @@ public class BulkLoader
477
429
  }
478
430
  }
479
431
 
432
+ public void doCleanup(ConfigSource config, ResumeState resume)
433
+ {
434
+ BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
435
+ ProcessPluginSet plugins = new ProcessPluginSet(task); // TODO don't create filter plugins
436
+
437
+ ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
438
+ ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
439
+ for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
440
+ if (inputCommitReport.isPresent()) {
441
+ successfulInputCommitReports.add(inputCommitReport.get());
442
+ }
443
+ }
444
+ for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
445
+ if (outputCommitReport.isPresent()) {
446
+ successfulOutputCommitReports.add(outputCommitReport.get());
447
+ }
448
+ }
449
+
450
+ plugins.getInputPlugin().cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
451
+ resume.getInputCommitReports().size(), successfulInputCommitReports.build());
452
+
453
+ plugins.getOutputPlugin().cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
454
+ resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
455
+ }
456
+
457
+ private ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
458
+ {
459
+ return Exec.newPlugin(ExecutorPlugin.class,
460
+ task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
461
+ }
462
+
480
463
  private ExecutionResult doRun(ConfigSource config)
481
464
  {
482
465
  final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
@@ -16,7 +16,7 @@ function r() {
16
16
  [ "$TRAVIS_BRANCH" != "master" -a "$TRAVIS_BRANCH" != "$(git describe --tags --always HEAD)" ] && exit 0
17
17
 
18
18
  revision="$(git rev-parse HEAD)"
19
- remote="$(git config remote.origin.url | sed "s+^git:+https:+")"
19
+ remote="https://github.com/embulk/embulk.github.io.git"
20
20
  re ./gradlew site
21
21
 
22
22
  r git fetch --unshallow || echo "using complete repository."
@@ -28,7 +28,7 @@ re cd gh_pages
28
28
  re git remote add travis_push "$remote"
29
29
  re git fetch travis_push
30
30
 
31
- re git checkout -b gh-pages travis_push/gh-pages
31
+ re git checkout -b gh-pages travis_push/master
32
32
  re rm -rf docs
33
33
  re cp -a ../embulk-docs/build/html docs
34
34
  re git add --all docs
@@ -46,4 +46,4 @@ fi
46
46
  re git config credential.helper "store --file=$HOME/.git_credentials"
47
47
  echo "https://$GITHUB_TOKEN:@github.com" > "$HOME/.git_credentials"
48
48
  trap "rm -rf $HOME/.git_credentials" EXIT
49
- re git push travis_push gh-pages
49
+ re git push travis_push gh-pages:master
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar -O /usr/local/bin/embulk
59
+ $ sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.2
7
8
  release/release-0.6.1
8
9
  release/release-0.6.0
9
10
  release/release-0.5.5
@@ -0,0 +1,17 @@
1
+ Release 0.6.2
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``guess-csv`` guesses charset as ``"MS932"`` instead of ``"Shift_JIS"`` because practically almost all of documents encoded by ``Shift_JIS`` are created by Windows and ``Shift_JIS`` implemented by Microsoft means ``MS932`` in Java. (@kosaki55tea++, @nalsh++)
8
+ * ``parser-csv`` recovers errors by invalid number formats and skips the row rather than making entire transaction failed (@hito4t++)
9
+
10
+ General Changes
11
+ ------------------
12
+
13
+ * Changed download URL. We can use the consitent URL to download the latest jar.
14
+
15
+ Release Date
16
+ ------------------
17
+ 2015-04-13
@@ -233,7 +233,7 @@ public class CsvParserPlugin
233
233
  });
234
234
  pageBuilder.addRecord();
235
235
 
236
- } catch (CsvTokenizer.InvalidFormatException e) {
236
+ } catch (CsvTokenizer.InvalidFormatException | CsvRecordValidateException e) {
237
237
  long lineNumber = tokenizer.getCurrentLineNumber();
238
238
  String skippedLine = tokenizer.skipCurrentLine();
239
239
  log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
@@ -4,6 +4,16 @@ module Embulk
4
4
  class CharsetGuessPlugin < GuessPlugin
5
5
  Plugin.register_guess('charset', self)
6
6
 
7
+ STATIC_MAPPING = {
8
+ # ISO-8859-1 means ASCII which is a subset of UTF-8 in most of cases
9
+ # due to lack of sample data set.
10
+ "ISO-8859-1" => "UTF-8",
11
+
12
+ # Shift_JIS is used almost only by Windows that uses "CP932" in fact.
13
+ # And "CP932" called by Microsoft actually means "MS932" in Java.
14
+ "Shift_JIS" => "MS932",
15
+ }
16
+
7
17
  def guess(config, sample_buffer)
8
18
  # ICU4J
9
19
  detector = com.ibm.icu.text.CharsetDetector.new
@@ -13,11 +23,8 @@ module Embulk
13
23
  name = "UTF-8"
14
24
  else
15
25
  name = best_match.getName
16
- if name == "ISO-8859-1"
17
- # ISO-8859-1 means ASCII which is a subset
18
- # of UTF-8 in most of cases due to lack of
19
- # sample data set
20
- name = "UTF-8"
26
+ if mapped_name = STATIC_MAPPING[name]
27
+ name = mapped_name
21
28
  end
22
29
  end
23
30
  return {"parser" => {"charset" => name}}
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.1'
2
+ VERSION = '0.6.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-09 00:00:00.000000000 Z
11
+ date: 2015-04-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -290,6 +290,7 @@ files:
290
290
  - embulk-docs/src/release/release-0.5.5.rst
291
291
  - embulk-docs/src/release/release-0.6.0.rst
292
292
  - embulk-docs/src/release/release-0.6.1.rst
293
+ - embulk-docs/src/release/release-0.6.2.rst
293
294
  - embulk-standards/build.gradle
294
295
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
295
296
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -395,8 +396,8 @@ files:
395
396
  - classpath/bval-jsr303-0.5.jar
396
397
  - classpath/commons-beanutils-core-1.8.3.jar
397
398
  - classpath/commons-lang3-3.1.jar
398
- - classpath/embulk-core-0.6.1.jar
399
- - classpath/embulk-standards-0.6.1.jar
399
+ - classpath/embulk-core-0.6.2.jar
400
+ - classpath/embulk-standards-0.6.2.jar
400
401
  - classpath/guava-18.0.jar
401
402
  - classpath/guice-3.0.jar
402
403
  - classpath/guice-multibindings-3.0.jar