embulk 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8621bf795aec3de19d66acb38d35a988f1a1a8c7
4
- data.tar.gz: 765914d9e5386fd53451ec4a7e03a527a2b811b4
3
+ metadata.gz: 06d9d42d47c71ddd9c290abc3fd90eae09756157
4
+ data.tar.gz: 4a1104ff4fb9657b5a2305120d8ffe0f2992c9ff
5
5
  SHA512:
6
- metadata.gz: 8f693d267fbdc5a24d22e2a1c9a09a668746c4216cb5de63091e608652998bf37deae8080877d91c35abe03c05565a9d19f5eff218ebad5cca21e0de30c988e2
7
- data.tar.gz: 67279b67ab2dc0667a1b0122b32bb45f29de95d7818d364a36f37ad8845469969a9cdfec823bc83722084522c7459bcfef6a528ac6708864c2935bd99b1e3508
6
+ metadata.gz: 9df10a5c5d6a0c77b38352f96d5f1cedbd658c0ac5a41e1720a3c8baa9f65685c742a34d0e6569d6bbb3d2c2e43423b48e3cc8d167dd4f8982568ea92f29d4eb
7
+ data.tar.gz: 75887fa0b97bc9750cca1d4abdeb500d0f8e83fd0ede84f467e671c83754acf0e3abb1a82a18c106d9e1250587f5b286a1c598fadeb7249d0c4320df0279f47c
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Embulk is a plugin-based parallel bulk data loader that helps **data transfer** between various **storages**, **databases**, **NoSQL** and **cloud services**.
4
4
 
5
- You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging effrots into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
5
+ You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging efforts into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
6
6
 
7
7
  [Embulk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
8
8
 
data/build.gradle CHANGED
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.6.5'
14
+ version = '0.6.6'
15
15
 
16
16
  ext {
17
17
  jrubyVersion = '1.7.19'
@@ -150,6 +150,9 @@ public class GuessExecutor
150
150
  input.transaction(guessInputConfig, new InputPlugin.Control() {
151
151
  public List<CommitReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
152
152
  {
153
+ if (taskCount == 0) {
154
+ throw new NoSampleException("No input files to guess");
155
+ }
153
156
  // TODO repeat runwith taskIndex++ if NoSampleException happens
154
157
  input.run(inputTaskSource, null, 0, new PageOutput() {
155
158
  @Override
@@ -57,6 +57,10 @@ public class SamplingParserPlugin
57
57
  runner.transaction(samplingInputConfig, new InputPlugin.Control() {
58
58
  public List<CommitReport> run(TaskSource taskSource, Schema schema, int taskCount)
59
59
  {
60
+ if (taskCount == 0) {
61
+ throw new NoSampleException("No input files to read sample data");
62
+ }
63
+ // TODO repeat runwith taskIndex++ if NoSampleException happens
60
64
  runner.run(taskSource, schema, 0, new PageOutput() {
61
65
  @Override
62
66
  public void add(Page page)
@@ -103,7 +103,14 @@ public class InputStreamFileInput
103
103
  public void close()
104
104
  {
105
105
  try {
106
- provider.close();
106
+ try {
107
+ if (current != null) {
108
+ current.close();
109
+ current = null;
110
+ }
111
+ } finally {
112
+ provider.close();
113
+ }
107
114
  } catch (IOException ex) {
108
115
  throw new RuntimeException(ex);
109
116
  }
@@ -48,7 +48,7 @@ A configuration file consists of following sections:
48
48
 
49
49
  * **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
50
50
 
51
- * **formatter:** If the output is file-based, fromatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
51
+ * **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
52
52
 
53
53
  * **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
54
54
 
@@ -75,7 +75,7 @@ Step 3: Confirm it works
75
75
 
76
76
  The next step is to actually use the plugin.
77
77
 
78
- Let's supporse you have a configuration file named ``your-config.yml``. You can use the plugin using embulk with ``-L`` argument:
78
+ Let's suppose you have a configuration file named ``your-config.yml``. You can use the plugin using embulk with ``-L`` argument:
79
79
 
80
80
  ::
81
81
 
@@ -11,7 +11,7 @@ This article shows how to:
11
11
  * Visualize the data with Kibana interactively.
12
12
  * Schedule the data loading every hour using cron.
13
13
 
14
- This guide assumes you are using Ubuntu 12.0 Precise or Mac OS X.
14
+ This guide assumes you are using Ubuntu 12.04 Precise or Mac OS X.
15
15
 
16
16
  Setup Elasticsearch and Kibana 4
17
17
  ------------------
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.6
7
8
  release/release-0.6.5
8
9
  release/release-0.6.4
9
10
  release/release-0.6.3
@@ -0,0 +1,17 @@
1
+ Release 0.6.6
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Fixed a problem that timestamp format guess code can't guess format if the string includes day of 30 and 31.
8
+ * ``guess`` and ``preview`` throw NoSampleException with appropriate error message if there are no input tasks.
9
+
10
+ Plugin API
11
+ ------------------
12
+
13
+ * ``spi.util.InputStreamFileInput#close`` closes currently opened InputStream.
14
+
15
+ Release Date
16
+ ------------------
17
+ 2015-05-04
@@ -36,7 +36,7 @@ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
36
36
 
37
37
  task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
38
38
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
39
- script "${project.name}-${project.version}.gem"
39
+ script "pkg/${project.name}-${project.version}.gem"
40
40
  }
41
41
 
42
42
  task "package"(dependsOn: ["gemspec", "classpath"]) << {
@@ -4,8 +4,8 @@ module Embulk::Guess
4
4
  YEAR = /[1-4][0-9]{3}/
5
5
  MONTH = /10|11|12|[0 ]?[0-9]/
6
6
  MONTH_NODELIM = /10|11|12|[0][0-9]/
7
- DAY = /[1-2][0-9]|[0 ]?[1-9]|30|31/
8
- DAY_NODELIM = /[1-2][0-9]|[0][1-9]|30|31/
7
+ DAY = /31|30|[1-2][0-9]|[0 ]?[1-9]/
8
+ DAY_NODELIM = /31|30|[1-2][0-9]|[0][1-9]/
9
9
  HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
10
10
  HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
11
11
  MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.5'
2
+ VERSION = '0.6.6'
3
3
  end
@@ -47,6 +47,10 @@ class TimeFormatGuessTest < ::Test::Unit::TestCase
47
47
  assert_guess "%d/%m/%Y", "21/01/2014"
48
48
  end
49
49
 
50
+ def test_format_borders
51
+ assert_guess "%Y-%m-%d %H:%M:%S.%N", "2014-12-31 23:59:59.999999999"
52
+ end
53
+
50
54
  def test_format_iso8601
51
55
  assert_guess "%Y-%m-%d", "1981-04-05"
52
56
  assert_guess "%Y-%m-%dT%H", "2007-04-06T13"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -295,6 +295,7 @@ files:
295
295
  - embulk-docs/src/release/release-0.6.3.rst
296
296
  - embulk-docs/src/release/release-0.6.4.rst
297
297
  - embulk-docs/src/release/release-0.6.5.rst
298
+ - embulk-docs/src/release/release-0.6.6.rst
298
299
  - embulk-standards/build.gradle
299
300
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
300
301
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -400,8 +401,8 @@ files:
400
401
  - classpath/bval-jsr303-0.5.jar
401
402
  - classpath/commons-beanutils-core-1.8.3.jar
402
403
  - classpath/commons-lang3-3.1.jar
403
- - classpath/embulk-core-0.6.5.jar
404
- - classpath/embulk-standards-0.6.5.jar
404
+ - classpath/embulk-core-0.6.6.jar
405
+ - classpath/embulk-standards-0.6.6.jar
405
406
  - classpath/guava-18.0.jar
406
407
  - classpath/guice-3.0.jar
407
408
  - classpath/guice-multibindings-3.0.jar