embulk 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +3 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +4 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +8 -1
- data/embulk-docs/src/built-in.rst +1 -1
- data/embulk-docs/src/customization.rst +1 -1
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.6.rst +17 -0
- data/lib/embulk/data/new/java/build.gradle.erb +1 -1
- data/lib/embulk/guess/time_format_guess.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_time_format_guess.rb +4 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06d9d42d47c71ddd9c290abc3fd90eae09756157
|
4
|
+
data.tar.gz: 4a1104ff4fb9657b5a2305120d8ffe0f2992c9ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9df10a5c5d6a0c77b38352f96d5f1cedbd658c0ac5a41e1720a3c8baa9f65685c742a34d0e6569d6bbb3d2c2e43423b48e3cc8d167dd4f8982568ea92f29d4eb
|
7
|
+
data.tar.gz: 75887fa0b97bc9750cca1d4abdeb500d0f8e83fd0ede84f467e671c83754acf0e3abb1a82a18c106d9e1250587f5b286a1c598fadeb7249d0c4320df0279f47c
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Embulk is a plugin-based parallel bulk data loader that helps **data transfer** between various **storages**, **databases**, **NoSQL** and **cloud services**.
|
4
4
|
|
5
|
-
You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging
|
5
|
+
You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging efforts into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
|
6
6
|
|
7
7
|
[Embulk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
|
8
8
|
|
data/build.gradle
CHANGED
@@ -150,6 +150,9 @@ public class GuessExecutor
|
|
150
150
|
input.transaction(guessInputConfig, new InputPlugin.Control() {
|
151
151
|
public List<CommitReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
|
152
152
|
{
|
153
|
+
if (taskCount == 0) {
|
154
|
+
throw new NoSampleException("No input files to guess");
|
155
|
+
}
|
153
156
|
// TODO repeat runwith taskIndex++ if NoSampleException happens
|
154
157
|
input.run(inputTaskSource, null, 0, new PageOutput() {
|
155
158
|
@Override
|
@@ -57,6 +57,10 @@ public class SamplingParserPlugin
|
|
57
57
|
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
|
58
58
|
public List<CommitReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
59
59
|
{
|
60
|
+
if (taskCount == 0) {
|
61
|
+
throw new NoSampleException("No input files to read sample data");
|
62
|
+
}
|
63
|
+
// TODO repeat runwith taskIndex++ if NoSampleException happens
|
60
64
|
runner.run(taskSource, schema, 0, new PageOutput() {
|
61
65
|
@Override
|
62
66
|
public void add(Page page)
|
@@ -103,7 +103,14 @@ public class InputStreamFileInput
|
|
103
103
|
public void close()
|
104
104
|
{
|
105
105
|
try {
|
106
|
-
|
106
|
+
try {
|
107
|
+
if (current != null) {
|
108
|
+
current.close();
|
109
|
+
current = null;
|
110
|
+
}
|
111
|
+
} finally {
|
112
|
+
provider.close();
|
113
|
+
}
|
107
114
|
} catch (IOException ex) {
|
108
115
|
throw new RuntimeException(ex);
|
109
116
|
}
|
@@ -48,7 +48,7 @@ A configuration file consists of following sections:
|
|
48
48
|
|
49
49
|
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
50
|
|
51
|
-
* **formatter:** If the output is file-based,
|
51
|
+
* **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
52
|
|
53
53
|
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
|
54
54
|
|
@@ -75,7 +75,7 @@ Step 3: Confirm it works
|
|
75
75
|
|
76
76
|
The next step is to actually use the plugin.
|
77
77
|
|
78
|
-
Let's
|
78
|
+
Let's suppose you have a configuration file named ``your-config.yml``. You can use the plugin using embulk with ``-L`` argument:
|
79
79
|
|
80
80
|
::
|
81
81
|
|
@@ -11,7 +11,7 @@ This article shows how to:
|
|
11
11
|
* Visualize the data with Kibana interactively.
|
12
12
|
* Schedule the data loading every hour using cron.
|
13
13
|
|
14
|
-
This guide assumes you are using Ubuntu 12.
|
14
|
+
This guide assumes you are using Ubuntu 12.04 Precise or Mac OS X.
|
15
15
|
|
16
16
|
Setup Elasticsearch and Kibana 4
|
17
17
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
Release 0.6.6
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed a problem that timestamp format guess code can't guess format if the string includes day of 30 and 31.
|
8
|
+
* ``guess`` and ``preview`` throw NoSampleException with appropriate error message if there are no input tasks.
|
9
|
+
|
10
|
+
Plugin API
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* ``spi.util.InputStreamFileInput#close`` closes currently opened InputStream.
|
14
|
+
|
15
|
+
Release Date
|
16
|
+
------------------
|
17
|
+
2015-05-04
|
@@ -36,7 +36,7 @@ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
|
36
36
|
|
37
37
|
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
38
38
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
|
39
|
-
script "
|
39
|
+
script "pkg/${project.name}-${project.version}.gem"
|
40
40
|
}
|
41
41
|
|
42
42
|
task "package"(dependsOn: ["gemspec", "classpath"]) << {
|
@@ -4,8 +4,8 @@ module Embulk::Guess
|
|
4
4
|
YEAR = /[1-4][0-9]{3}/
|
5
5
|
MONTH = /10|11|12|[0 ]?[0-9]/
|
6
6
|
MONTH_NODELIM = /10|11|12|[0][0-9]/
|
7
|
-
DAY = /[1-2][0-9]|[0 ]?[1-9]
|
8
|
-
DAY_NODELIM = /[1-2][0-9]|[0][1-9]
|
7
|
+
DAY = /31|30|[1-2][0-9]|[0 ]?[1-9]/
|
8
|
+
DAY_NODELIM = /31|30|[1-2][0-9]|[0][1-9]/
|
9
9
|
HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
|
10
10
|
HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
|
11
11
|
MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
|
data/lib/embulk/version.rb
CHANGED
@@ -47,6 +47,10 @@ class TimeFormatGuessTest < ::Test::Unit::TestCase
|
|
47
47
|
assert_guess "%d/%m/%Y", "21/01/2014"
|
48
48
|
end
|
49
49
|
|
50
|
+
def test_format_borders
|
51
|
+
assert_guess "%Y-%m-%d %H:%M:%S.%N", "2014-12-31 23:59:59.999999999"
|
52
|
+
end
|
53
|
+
|
50
54
|
def test_format_iso8601
|
51
55
|
assert_guess "%Y-%m-%d", "1981-04-05"
|
52
56
|
assert_guess "%Y-%m-%dT%H", "2007-04-06T13"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04
|
11
|
+
date: 2015-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -295,6 +295,7 @@ files:
|
|
295
295
|
- embulk-docs/src/release/release-0.6.3.rst
|
296
296
|
- embulk-docs/src/release/release-0.6.4.rst
|
297
297
|
- embulk-docs/src/release/release-0.6.5.rst
|
298
|
+
- embulk-docs/src/release/release-0.6.6.rst
|
298
299
|
- embulk-standards/build.gradle
|
299
300
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
300
301
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -400,8 +401,8 @@ files:
|
|
400
401
|
- classpath/bval-jsr303-0.5.jar
|
401
402
|
- classpath/commons-beanutils-core-1.8.3.jar
|
402
403
|
- classpath/commons-lang3-3.1.jar
|
403
|
-
- classpath/embulk-core-0.6.
|
404
|
-
- classpath/embulk-standards-0.6.
|
404
|
+
- classpath/embulk-core-0.6.6.jar
|
405
|
+
- classpath/embulk-standards-0.6.6.jar
|
405
406
|
- classpath/guava-18.0.jar
|
406
407
|
- classpath/guice-3.0.jar
|
407
408
|
- classpath/guice-multibindings-3.0.jar
|