embulk 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +3 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +4 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +8 -1
- data/embulk-docs/src/built-in.rst +1 -1
- data/embulk-docs/src/customization.rst +1 -1
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.6.rst +17 -0
- data/lib/embulk/data/new/java/build.gradle.erb +1 -1
- data/lib/embulk/guess/time_format_guess.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_time_format_guess.rb +4 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06d9d42d47c71ddd9c290abc3fd90eae09756157
|
4
|
+
data.tar.gz: 4a1104ff4fb9657b5a2305120d8ffe0f2992c9ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9df10a5c5d6a0c77b38352f96d5f1cedbd658c0ac5a41e1720a3c8baa9f65685c742a34d0e6569d6bbb3d2c2e43423b48e3cc8d167dd4f8982568ea92f29d4eb
|
7
|
+
data.tar.gz: 75887fa0b97bc9750cca1d4abdeb500d0f8e83fd0ede84f467e671c83754acf0e3abb1a82a18c106d9e1250587f5b286a1c598fadeb7249d0c4320df0279f47c
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Embulk is a plugin-based parallel bulk data loader that helps **data transfer** between various **storages**, **databases**, **NoSQL** and **cloud services**.
|
4
4
|
|
5
|
-
You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging
|
5
|
+
You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging efforts into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
|
6
6
|
|
7
7
|
[Embulk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
|
8
8
|
|
data/build.gradle
CHANGED
@@ -150,6 +150,9 @@ public class GuessExecutor
|
|
150
150
|
input.transaction(guessInputConfig, new InputPlugin.Control() {
|
151
151
|
public List<CommitReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
|
152
152
|
{
|
153
|
+
if (taskCount == 0) {
|
154
|
+
throw new NoSampleException("No input files to guess");
|
155
|
+
}
|
153
156
|
// TODO repeat runwith taskIndex++ if NoSampleException happens
|
154
157
|
input.run(inputTaskSource, null, 0, new PageOutput() {
|
155
158
|
@Override
|
@@ -57,6 +57,10 @@ public class SamplingParserPlugin
|
|
57
57
|
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
|
58
58
|
public List<CommitReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
59
59
|
{
|
60
|
+
if (taskCount == 0) {
|
61
|
+
throw new NoSampleException("No input files to read sample data");
|
62
|
+
}
|
63
|
+
// TODO repeat runwith taskIndex++ if NoSampleException happens
|
60
64
|
runner.run(taskSource, schema, 0, new PageOutput() {
|
61
65
|
@Override
|
62
66
|
public void add(Page page)
|
@@ -103,7 +103,14 @@ public class InputStreamFileInput
|
|
103
103
|
public void close()
|
104
104
|
{
|
105
105
|
try {
|
106
|
-
|
106
|
+
try {
|
107
|
+
if (current != null) {
|
108
|
+
current.close();
|
109
|
+
current = null;
|
110
|
+
}
|
111
|
+
} finally {
|
112
|
+
provider.close();
|
113
|
+
}
|
107
114
|
} catch (IOException ex) {
|
108
115
|
throw new RuntimeException(ex);
|
109
116
|
}
|
@@ -48,7 +48,7 @@ A configuration file consists of following sections:
|
|
48
48
|
|
49
49
|
* **out:** Output plugin options. An output plugin is either record-based (`Oracle <https://github.com/embulk/embulk-output-jdbc>`_, `Elasticsearch <https://github.com/muga/embulk-output-elasticsearch>`_, etc) or file-based (`Google Cloud Storage <https://github.com/hakobera/embulk-output-gcs>`_, `Command <https://github.com/embulk/embulk-output-command>`_, etc)
|
50
50
|
|
51
|
-
* **formatter:** If the output is file-based,
|
51
|
+
* **formatter:** If the output is file-based, formatter plugin formats a file format (such as built-in csv, `JSON <https://github.com/takei-yuya/embulk-formatter-jsonl>`_)
|
52
52
|
|
53
53
|
* **encoder:** If the output is file-based, encoder plugin encodes compression or encryption (such as built-in gzip)
|
54
54
|
|
@@ -75,7 +75,7 @@ Step 3: Confirm it works
|
|
75
75
|
|
76
76
|
The next step is to actually use the plugin.
|
77
77
|
|
78
|
-
Let's
|
78
|
+
Let's suppose you have a configuration file named ``your-config.yml``. You can use the plugin using embulk with ``-L`` argument:
|
79
79
|
|
80
80
|
::
|
81
81
|
|
@@ -11,7 +11,7 @@ This article shows how to:
|
|
11
11
|
* Visualize the data with Kibana interactively.
|
12
12
|
* Schedule the data loading every hour using cron.
|
13
13
|
|
14
|
-
This guide assumes you are using Ubuntu 12.
|
14
|
+
This guide assumes you are using Ubuntu 12.04 Precise or Mac OS X.
|
15
15
|
|
16
16
|
Setup Elasticsearch and Kibana 4
|
17
17
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
Release 0.6.6
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed a problem that timestamp format guess code can't guess format if the string includes day of 30 and 31.
|
8
|
+
* ``guess`` and ``preview`` throw NoSampleException with appropriate error message if there are no input tasks.
|
9
|
+
|
10
|
+
Plugin API
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* ``spi.util.InputStreamFileInput#close`` closes currently opened InputStream.
|
14
|
+
|
15
|
+
Release Date
|
16
|
+
------------------
|
17
|
+
2015-05-04
|
@@ -36,7 +36,7 @@ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
|
36
36
|
|
37
37
|
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
38
38
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
|
39
|
-
script "
|
39
|
+
script "pkg/${project.name}-${project.version}.gem"
|
40
40
|
}
|
41
41
|
|
42
42
|
task "package"(dependsOn: ["gemspec", "classpath"]) << {
|
@@ -4,8 +4,8 @@ module Embulk::Guess
|
|
4
4
|
YEAR = /[1-4][0-9]{3}/
|
5
5
|
MONTH = /10|11|12|[0 ]?[0-9]/
|
6
6
|
MONTH_NODELIM = /10|11|12|[0][0-9]/
|
7
|
-
DAY = /[1-2][0-9]|[0 ]?[1-9]
|
8
|
-
DAY_NODELIM = /[1-2][0-9]|[0][1-9]
|
7
|
+
DAY = /31|30|[1-2][0-9]|[0 ]?[1-9]/
|
8
|
+
DAY_NODELIM = /31|30|[1-2][0-9]|[0][1-9]/
|
9
9
|
HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
|
10
10
|
HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
|
11
11
|
MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
|
data/lib/embulk/version.rb
CHANGED
@@ -47,6 +47,10 @@ class TimeFormatGuessTest < ::Test::Unit::TestCase
|
|
47
47
|
assert_guess "%d/%m/%Y", "21/01/2014"
|
48
48
|
end
|
49
49
|
|
50
|
+
def test_format_borders
|
51
|
+
assert_guess "%Y-%m-%d %H:%M:%S.%N", "2014-12-31 23:59:59.999999999"
|
52
|
+
end
|
53
|
+
|
50
54
|
def test_format_iso8601
|
51
55
|
assert_guess "%Y-%m-%d", "1981-04-05"
|
52
56
|
assert_guess "%Y-%m-%dT%H", "2007-04-06T13"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04
|
11
|
+
date: 2015-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -295,6 +295,7 @@ files:
|
|
295
295
|
- embulk-docs/src/release/release-0.6.3.rst
|
296
296
|
- embulk-docs/src/release/release-0.6.4.rst
|
297
297
|
- embulk-docs/src/release/release-0.6.5.rst
|
298
|
+
- embulk-docs/src/release/release-0.6.6.rst
|
298
299
|
- embulk-standards/build.gradle
|
299
300
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
300
301
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -400,8 +401,8 @@ files:
|
|
400
401
|
- classpath/bval-jsr303-0.5.jar
|
401
402
|
- classpath/commons-beanutils-core-1.8.3.jar
|
402
403
|
- classpath/commons-lang3-3.1.jar
|
403
|
-
- classpath/embulk-core-0.6.
|
404
|
-
- classpath/embulk-standards-0.6.
|
404
|
+
- classpath/embulk-core-0.6.6.jar
|
405
|
+
- classpath/embulk-standards-0.6.6.jar
|
405
406
|
- classpath/guava-18.0.jar
|
406
407
|
- classpath/guice-3.0.jar
|
407
408
|
- classpath/guice-multibindings-3.0.jar
|