embulk 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6de3d95e030e043dd82676af00373a34a2e50e47
4
- data.tar.gz: 554e7b89b160fe65d540eeffd1dd03403b08fdaf
3
+ metadata.gz: 148c75f1b85e1d85859d0be3293d85e5fc3f1bab
4
+ data.tar.gz: c07d2801fc7cc4b0caa60d2ff692553b29827cfe
5
5
  SHA512:
6
- metadata.gz: ad8d669c190e0d468c44803fbf5b4614302e19e582a1f66cf99540a4a00ae1baf9261ad568cd6116c8c22fbefe3a7d1d13585ab25e8cbc38ec79837ce823fd40
7
- data.tar.gz: 562d3cff28a2ba94de942a36e768e20f3205cf44a8b95551223702fce8fe1cfe974eb9ce6c6a8cb4b5416189f9b5dadf0b4c5e2657282c90b0932bf4d9cf955c
6
+ metadata.gz: 6f499389219a19663e1e3789b68bf1eea7f650df7921347ec5d0641395a1a1b11a5087f571a9e0ac3536cc1156592cb15b1f5b4283e069f68d456449c90d4c69
7
+ data.tar.gz: 78eeb275c30689f63947961fd1d2f0b01d96abfef0c16b1e68370fdca0570c809f1e4cb38924e67c10f11e86759c88e706dcb32c45f55eaf10876055bdedf8ea
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest http://dl.embulk.org/embulk-latest.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying the example](#trying-the-example)
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.6.1'
15
+ version = '0.6.2'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -252,7 +252,6 @@ task set_version << {
252
252
 
253
253
  List<String> docs = [
254
254
  'README.md',
255
- 'embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst'
256
255
  ]
257
256
  docs.each() { path ->
258
257
  File doc = file(path)
@@ -327,28 +327,6 @@ public class BulkLoader
327
327
  }
328
328
  }
329
329
 
330
- protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
331
- {
332
- return Exec.newPlugin(ExecutorPlugin.class,
333
- task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
334
- }
335
-
336
- protected InputPlugin newInputPlugin(BulkLoaderTask task)
337
- {
338
- return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
339
- }
340
-
341
- protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
342
- {
343
- return Filters.newFilterPlugins(Exec.session(),
344
- Filters.getPluginTypes(task.getFilterConfigs()));
345
- }
346
-
347
- protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
348
- {
349
- return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
350
- }
351
-
352
330
  public ExecutionResult run(ExecSession exec, final ConfigSource config)
353
331
  {
354
332
  try {
@@ -400,32 +378,6 @@ public class BulkLoader
400
378
  }
401
379
  }
402
380
 
403
- public void doCleanup(ConfigSource config, ResumeState resume)
404
- {
405
- BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
406
- InputPlugin inputPlugin = newInputPlugin(task);
407
- OutputPlugin outputPlugin = newOutputPlugin(task);
408
-
409
- ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
410
- ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
411
- for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
412
- if (inputCommitReport.isPresent()) {
413
- successfulInputCommitReports.add(inputCommitReport.get());
414
- }
415
- }
416
- for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
417
- if (outputCommitReport.isPresent()) {
418
- successfulOutputCommitReports.add(outputCommitReport.get());
419
- }
420
- }
421
-
422
- inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
423
- resume.getInputCommitReports().size(), successfulInputCommitReports.build());
424
-
425
- outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
426
- resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
427
- }
428
-
429
381
  private static class ProcessPluginSet
430
382
  {
431
383
  private final PluginType inputPluginType;
@@ -477,6 +429,37 @@ public class BulkLoader
477
429
  }
478
430
  }
479
431
 
432
+ public void doCleanup(ConfigSource config, ResumeState resume)
433
+ {
434
+ BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
435
+ ProcessPluginSet plugins = new ProcessPluginSet(task); // TODO don't create filter plugins
436
+
437
+ ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
438
+ ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
439
+ for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
440
+ if (inputCommitReport.isPresent()) {
441
+ successfulInputCommitReports.add(inputCommitReport.get());
442
+ }
443
+ }
444
+ for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
445
+ if (outputCommitReport.isPresent()) {
446
+ successfulOutputCommitReports.add(outputCommitReport.get());
447
+ }
448
+ }
449
+
450
+ plugins.getInputPlugin().cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
451
+ resume.getInputCommitReports().size(), successfulInputCommitReports.build());
452
+
453
+ plugins.getOutputPlugin().cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
454
+ resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
455
+ }
456
+
457
+ private ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
458
+ {
459
+ return Exec.newPlugin(ExecutorPlugin.class,
460
+ task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
461
+ }
462
+
480
463
  private ExecutionResult doRun(ConfigSource config)
481
464
  {
482
465
  final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
@@ -16,7 +16,7 @@ function r() {
16
16
  [ "$TRAVIS_BRANCH" != "master" -a "$TRAVIS_BRANCH" != "$(git describe --tags --always HEAD)" ] && exit 0
17
17
 
18
18
  revision="$(git rev-parse HEAD)"
19
- remote="$(git config remote.origin.url | sed "s+^git:+https:+")"
19
+ remote="https://github.com/embulk/embulk.github.io.git"
20
20
  re ./gradlew site
21
21
 
22
22
  r git fetch --unshallow || echo "using complete repository."
@@ -28,7 +28,7 @@ re cd gh_pages
28
28
  re git remote add travis_push "$remote"
29
29
  re git fetch travis_push
30
30
 
31
- re git checkout -b gh-pages travis_push/gh-pages
31
+ re git checkout -b gh-pages travis_push/master
32
32
  re rm -rf docs
33
33
  re cp -a ../embulk-docs/build/html docs
34
34
  re git add --all docs
@@ -46,4 +46,4 @@ fi
46
46
  re git config credential.helper "store --file=$HOME/.git_credentials"
47
47
  echo "https://$GITHUB_TOKEN:@github.com" > "$HOME/.git_credentials"
48
48
  trap "rm -rf $HOME/.git_credentials" EXIT
49
- re git push travis_push gh-pages
49
+ re git push travis_push gh-pages:master
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.6.1.jar -O /usr/local/bin/embulk
59
+ $ sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.2
7
8
  release/release-0.6.1
8
9
  release/release-0.6.0
9
10
  release/release-0.5.5
@@ -0,0 +1,17 @@
1
+ Release 0.6.2
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``guess-csv`` guesses charset as ``"MS932"`` instead of ``"Shift_JIS"`` because practically almost all of documents encoded by ``Shift_JIS`` are created by Windows and ``Shift_JIS`` implemented by Microsoft means ``MS932`` in Java. (@kosaki55tea++, @nalsh++)
8
+ * ``parser-csv`` recovers errors by invalid number formats and skips the row rather than making entire transaction failed (@hito4t++)
9
+
10
+ General Changes
11
+ ------------------
12
+
13
+ * Changed download URL. We can use the consitent URL to download the latest jar.
14
+
15
+ Release Date
16
+ ------------------
17
+ 2015-04-13
@@ -233,7 +233,7 @@ public class CsvParserPlugin
233
233
  });
234
234
  pageBuilder.addRecord();
235
235
 
236
- } catch (CsvTokenizer.InvalidFormatException e) {
236
+ } catch (CsvTokenizer.InvalidFormatException | CsvRecordValidateException e) {
237
237
  long lineNumber = tokenizer.getCurrentLineNumber();
238
238
  String skippedLine = tokenizer.skipCurrentLine();
239
239
  log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
@@ -4,6 +4,16 @@ module Embulk
4
4
  class CharsetGuessPlugin < GuessPlugin
5
5
  Plugin.register_guess('charset', self)
6
6
 
7
+ STATIC_MAPPING = {
8
+ # ISO-8859-1 means ASCII which is a subset of UTF-8 in most of cases
9
+ # due to lack of sample data set.
10
+ "ISO-8859-1" => "UTF-8",
11
+
12
+ # Shift_JIS is used almost only by Windows that uses "CP932" in fact.
13
+ # And "CP932" called by Microsoft actually means "MS932" in Java.
14
+ "Shift_JIS" => "MS932",
15
+ }
16
+
7
17
  def guess(config, sample_buffer)
8
18
  # ICU4J
9
19
  detector = com.ibm.icu.text.CharsetDetector.new
@@ -13,11 +23,8 @@ module Embulk
13
23
  name = "UTF-8"
14
24
  else
15
25
  name = best_match.getName
16
- if name == "ISO-8859-1"
17
- # ISO-8859-1 means ASCII which is a subset
18
- # of UTF-8 in most of cases due to lack of
19
- # sample data set
20
- name = "UTF-8"
26
+ if mapped_name = STATIC_MAPPING[name]
27
+ name = mapped_name
21
28
  end
22
29
  end
23
30
  return {"parser" => {"charset" => name}}
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.1'
2
+ VERSION = '0.6.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-09 00:00:00.000000000 Z
11
+ date: 2015-04-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -290,6 +290,7 @@ files:
290
290
  - embulk-docs/src/release/release-0.5.5.rst
291
291
  - embulk-docs/src/release/release-0.6.0.rst
292
292
  - embulk-docs/src/release/release-0.6.1.rst
293
+ - embulk-docs/src/release/release-0.6.2.rst
293
294
  - embulk-standards/build.gradle
294
295
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
295
296
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -395,8 +396,8 @@ files:
395
396
  - classpath/bval-jsr303-0.5.jar
396
397
  - classpath/commons-beanutils-core-1.8.3.jar
397
398
  - classpath/commons-lang3-3.1.jar
398
- - classpath/embulk-core-0.6.1.jar
399
- - classpath/embulk-standards-0.6.1.jar
399
+ - classpath/embulk-core-0.6.2.jar
400
+ - classpath/embulk-standards-0.6.2.jar
400
401
  - classpath/guava-18.0.jar
401
402
  - classpath/guice-3.0.jar
402
403
  - classpath/guice-multibindings-3.0.jar