embulk 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +8 -3
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +13 -12
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.5.3.rst +22 -0
- data/lib/embulk/command/embulk_run.rb +4 -4
- data/lib/embulk/guess/schema_guess.rb +11 -2
- data/lib/embulk/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0cd34264ca6a673f948b77e42cb3049e184a4f40
|
4
|
+
data.tar.gz: 8d56e898df4411110bc5c42c6817803d93a732fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13ad1c043fff795f33199985f955d67238d4d54297516cd3d3ac8abcd1ea90e9cc43569e2745918c6090ce4373ae01a92f2cd26f35fddd37a93aa5d70c8be273
|
7
|
+
data.tar.gz: f02d113e3eddb344f9080d9dc5163c61e6718455b4d404607eb0a1bd62ac94da547697dd2be5796c6a44404a92ef01c845553f7da7a672b005dc434c5b3c5c13
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying examples](#trying-examples)
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.5.
|
15
|
+
version = '0.5.3'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -105,7 +105,11 @@ subprojects {
|
|
105
105
|
}
|
106
106
|
}
|
107
107
|
|
108
|
-
// add javadoc/source jar tasks as artifacts to be released
|
108
|
+
// add tests/javadoc/source jar tasks as artifacts to be released
|
109
|
+
task testsJar(type: Jar, dependsOn: classes) {
|
110
|
+
classifier = 'tests'
|
111
|
+
from sourceSets.test.output
|
112
|
+
}
|
109
113
|
task sourcesJar(type: Jar, dependsOn: classes) {
|
110
114
|
classifier = 'sources'
|
111
115
|
from sourceSets.main.allSource
|
@@ -115,7 +119,7 @@ subprojects {
|
|
115
119
|
from javadoc.destinationDir
|
116
120
|
}
|
117
121
|
artifacts {
|
118
|
-
archives sourcesJar, javadocJar
|
122
|
+
archives testsJar, sourcesJar, javadocJar
|
119
123
|
}
|
120
124
|
}
|
121
125
|
|
@@ -124,6 +128,7 @@ subprojects {
|
|
124
128
|
if (release_projects.contains(project)) {
|
125
129
|
bintrayMavenRelease(MavenPublication) {
|
126
130
|
from components.java
|
131
|
+
artifact testsJar
|
127
132
|
artifact sourcesJar
|
128
133
|
artifact javadocJar
|
129
134
|
}
|
@@ -51,6 +51,7 @@ public class SamplingParserPlugin
|
|
51
51
|
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
52
52
|
ConfigSource samplingInputConfig = inputConfig.deepCopy();
|
53
53
|
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
54
|
+
samplingInputConfig.set("decoders", null);
|
54
55
|
|
55
56
|
try {
|
56
57
|
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
|
@@ -78,23 +79,23 @@ public class SamplingParserPlugin
|
|
78
79
|
|
79
80
|
private static Buffer getSample(FileInput fileInput, int maxSampleSize)
|
80
81
|
{
|
82
|
+
if (!fileInput.nextFile()) {
|
83
|
+
// no input files
|
84
|
+
return Buffer.EMPTY;
|
85
|
+
}
|
86
|
+
|
81
87
|
Buffer sample = Buffer.allocate(maxSampleSize);
|
82
88
|
int sampleSize = 0;
|
83
89
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
sample.setBytes(sampleSize, buffer, 0, size);
|
92
|
-
sampleSize += size;
|
93
|
-
}
|
94
|
-
buffer.release();
|
90
|
+
for (Buffer buffer : each(fileInput)) {
|
91
|
+
int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
|
92
|
+
sample.setBytes(sampleSize, buffer, 0, size);
|
93
|
+
sampleSize += size;
|
94
|
+
buffer.release();
|
95
|
+
if (sampleSize >= maxSampleSize) {
|
96
|
+
break;
|
95
97
|
}
|
96
98
|
}
|
97
|
-
|
98
99
|
sample.limit(sampleSize);
|
99
100
|
return sample;
|
100
101
|
}
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
59
|
+
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
Release 0.5.3
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``guess`` guesses boolean types (@hata++)
|
8
|
+
|
9
|
+
|
10
|
+
General Changes
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* ``guess`` stops reading the file after first 32KB and ignores remaining data. Execution time of ``guess`` improves significantly especially when the first file is large.
|
14
|
+
|
15
|
+
* ``guess`` ignores decoders when it reads sample data. This change fixes the problem where guess fails if config file includes ``decoder-gzip`` (@hata++).
|
16
|
+
|
17
|
+
* Releases ``embulk-core-VERSION-tests.jar`` in addition to ``embulk-core-VERSION-sources.jar``.
|
18
|
+
|
19
|
+
|
20
|
+
Release Date
|
21
|
+
------------------
|
22
|
+
2015-03-17
|
@@ -58,7 +58,7 @@ module Embulk
|
|
58
58
|
|
59
59
|
when :run
|
60
60
|
op.banner = "Usage: run <config.yml>"
|
61
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
61
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
62
62
|
options[:logLevel] = level
|
63
63
|
end
|
64
64
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -77,7 +77,7 @@ module Embulk
|
|
77
77
|
|
78
78
|
when :cleanup
|
79
79
|
op.banner = "Usage: cleanup <config.yml>"
|
80
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
80
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
81
81
|
options[:logLevel] = level
|
82
82
|
end
|
83
83
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -93,7 +93,7 @@ module Embulk
|
|
93
93
|
|
94
94
|
when :preview
|
95
95
|
op.banner = "Usage: preview <config.yml>"
|
96
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
96
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
97
97
|
options[:logLevel] = level
|
98
98
|
end
|
99
99
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -109,7 +109,7 @@ module Embulk
|
|
109
109
|
|
110
110
|
when :guess
|
111
111
|
op.banner = "Usage: guess <partial-config.yml>"
|
112
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
112
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
113
113
|
options[:logLevel] = level
|
114
114
|
end
|
115
115
|
op.on('-o', '--output PATH', 'Path to a file to write the guessed configuration') do |path|
|
@@ -45,7 +45,7 @@ module Embulk::Guess
|
|
45
45
|
private
|
46
46
|
|
47
47
|
def guess_type(str)
|
48
|
-
if TRUE_STRINGS[str]
|
48
|
+
if TRUE_STRINGS[str] || FALSE_STRINGS[str]
|
49
49
|
return "boolean"
|
50
50
|
end
|
51
51
|
|
@@ -78,7 +78,7 @@ module Embulk::Guess
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# taken from CsvParserPlugin.TRUE_STRINGS
|
81
|
-
TRUE_STRINGS = Hash[
|
81
|
+
TRUE_STRINGS = Hash[%w[
|
82
82
|
true True TRUE
|
83
83
|
yes Yes YES
|
84
84
|
y Y
|
@@ -86,6 +86,15 @@ module Embulk::Guess
|
|
86
86
|
1
|
87
87
|
].map {|k| [k, true] }]
|
88
88
|
|
89
|
+
# When matching to false string, then retrun 'true'
|
90
|
+
FALSE_STRINGS = Hash[%w[
|
91
|
+
false False FALSE
|
92
|
+
no No NO
|
93
|
+
n N
|
94
|
+
off Off OFF
|
95
|
+
0
|
96
|
+
].map {|k| [k, true] }]
|
97
|
+
|
89
98
|
TYPE_COALESCE = Hash[{
|
90
99
|
long: :double,
|
91
100
|
boolean: :long,
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -274,6 +274,7 @@ files:
|
|
274
274
|
- embulk-docs/src/release/release-0.5.0.rst
|
275
275
|
- embulk-docs/src/release/release-0.5.1.rst
|
276
276
|
- embulk-docs/src/release/release-0.5.2.rst
|
277
|
+
- embulk-docs/src/release/release-0.5.3.rst
|
277
278
|
- embulk-standards/build.gradle
|
278
279
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
279
280
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -378,8 +379,8 @@ files:
|
|
378
379
|
- classpath/bval-jsr303-0.5.jar
|
379
380
|
- classpath/commons-beanutils-core-1.8.3.jar
|
380
381
|
- classpath/commons-lang3-3.1.jar
|
381
|
-
- classpath/embulk-core-0.5.
|
382
|
-
- classpath/embulk-standards-0.5.
|
382
|
+
- classpath/embulk-core-0.5.3.jar
|
383
|
+
- classpath/embulk-standards-0.5.3.jar
|
383
384
|
- classpath/guava-18.0.jar
|
384
385
|
- classpath/guice-3.0.jar
|
385
386
|
- classpath/guice-multibindings-3.0.jar
|