embulk 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +8 -3
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +13 -12
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.5.3.rst +22 -0
- data/lib/embulk/command/embulk_run.rb +4 -4
- data/lib/embulk/guess/schema_guess.rb +11 -2
- data/lib/embulk/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0cd34264ca6a673f948b77e42cb3049e184a4f40
|
4
|
+
data.tar.gz: 8d56e898df4411110bc5c42c6817803d93a732fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13ad1c043fff795f33199985f955d67238d4d54297516cd3d3ac8abcd1ea90e9cc43569e2745918c6090ce4373ae01a92f2cd26f35fddd37a93aa5d70c8be273
|
7
|
+
data.tar.gz: f02d113e3eddb344f9080d9dc5163c61e6718455b4d404607eb0a1bd62ac94da547697dd2be5796c6a44404a92ef01c845553f7da7a672b005dc434c5b3c5c13
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying examples](#trying-examples)
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.5.
|
15
|
+
version = '0.5.3'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -105,7 +105,11 @@ subprojects {
|
|
105
105
|
}
|
106
106
|
}
|
107
107
|
|
108
|
-
// add javadoc/source jar tasks as artifacts to be released
|
108
|
+
// add tests/javadoc/source jar tasks as artifacts to be released
|
109
|
+
task testsJar(type: Jar, dependsOn: classes) {
|
110
|
+
classifier = 'tests'
|
111
|
+
from sourceSets.test.output
|
112
|
+
}
|
109
113
|
task sourcesJar(type: Jar, dependsOn: classes) {
|
110
114
|
classifier = 'sources'
|
111
115
|
from sourceSets.main.allSource
|
@@ -115,7 +119,7 @@ subprojects {
|
|
115
119
|
from javadoc.destinationDir
|
116
120
|
}
|
117
121
|
artifacts {
|
118
|
-
archives sourcesJar, javadocJar
|
122
|
+
archives testsJar, sourcesJar, javadocJar
|
119
123
|
}
|
120
124
|
}
|
121
125
|
|
@@ -124,6 +128,7 @@ subprojects {
|
|
124
128
|
if (release_projects.contains(project)) {
|
125
129
|
bintrayMavenRelease(MavenPublication) {
|
126
130
|
from components.java
|
131
|
+
artifact testsJar
|
127
132
|
artifact sourcesJar
|
128
133
|
artifact javadocJar
|
129
134
|
}
|
@@ -51,6 +51,7 @@ public class SamplingParserPlugin
|
|
51
51
|
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
52
52
|
ConfigSource samplingInputConfig = inputConfig.deepCopy();
|
53
53
|
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
54
|
+
samplingInputConfig.set("decoders", null);
|
54
55
|
|
55
56
|
try {
|
56
57
|
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
|
@@ -78,23 +79,23 @@ public class SamplingParserPlugin
|
|
78
79
|
|
79
80
|
private static Buffer getSample(FileInput fileInput, int maxSampleSize)
|
80
81
|
{
|
82
|
+
if (!fileInput.nextFile()) {
|
83
|
+
// no input files
|
84
|
+
return Buffer.EMPTY;
|
85
|
+
}
|
86
|
+
|
81
87
|
Buffer sample = Buffer.allocate(maxSampleSize);
|
82
88
|
int sampleSize = 0;
|
83
89
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
sample.setBytes(sampleSize, buffer, 0, size);
|
92
|
-
sampleSize += size;
|
93
|
-
}
|
94
|
-
buffer.release();
|
90
|
+
for (Buffer buffer : each(fileInput)) {
|
91
|
+
int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
|
92
|
+
sample.setBytes(sampleSize, buffer, 0, size);
|
93
|
+
sampleSize += size;
|
94
|
+
buffer.release();
|
95
|
+
if (sampleSize >= maxSampleSize) {
|
96
|
+
break;
|
95
97
|
}
|
96
98
|
}
|
97
|
-
|
98
99
|
sample.limit(sampleSize);
|
99
100
|
return sample;
|
100
101
|
}
|
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
|
|
56
56
|
|
57
57
|
.. code-block:: console
|
58
58
|
|
59
|
-
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.
|
59
|
+
$ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -O /usr/local/bin/embulk
|
60
60
|
$ sudo chmod +x /usr/local/bin/embulk
|
61
61
|
|
62
62
|
Step 2. Install Elasticsearch plugin
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
Release 0.5.3
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``guess`` guesses boolean types (@hata++)
|
8
|
+
|
9
|
+
|
10
|
+
General Changes
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* ``guess`` stops reading the file after first 32KB and ignores remaining data. Execution time of ``guess`` improves significantly especially when the first file is large.
|
14
|
+
|
15
|
+
* ``guess`` ignores decoders when it reads sample data. This change fixes the problem where guess fails if config file includes ``decoder-gzip`` (@hata++).
|
16
|
+
|
17
|
+
* Releases ``embulk-core-VERSION-tests.jar`` in addition to ``embulk-core-VERSION-sources.jar``.
|
18
|
+
|
19
|
+
|
20
|
+
Release Date
|
21
|
+
------------------
|
22
|
+
2015-03-17
|
@@ -58,7 +58,7 @@ module Embulk
|
|
58
58
|
|
59
59
|
when :run
|
60
60
|
op.banner = "Usage: run <config.yml>"
|
61
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
61
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
62
62
|
options[:logLevel] = level
|
63
63
|
end
|
64
64
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -77,7 +77,7 @@ module Embulk
|
|
77
77
|
|
78
78
|
when :cleanup
|
79
79
|
op.banner = "Usage: cleanup <config.yml>"
|
80
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
80
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
81
81
|
options[:logLevel] = level
|
82
82
|
end
|
83
83
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -93,7 +93,7 @@ module Embulk
|
|
93
93
|
|
94
94
|
when :preview
|
95
95
|
op.banner = "Usage: preview <config.yml>"
|
96
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
96
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
97
97
|
options[:logLevel] = level
|
98
98
|
end
|
99
99
|
op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path|
|
@@ -109,7 +109,7 @@ module Embulk
|
|
109
109
|
|
110
110
|
when :guess
|
111
111
|
op.banner = "Usage: guess <partial-config.yml>"
|
112
|
-
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, or trace)') do |level|
|
112
|
+
op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level|
|
113
113
|
options[:logLevel] = level
|
114
114
|
end
|
115
115
|
op.on('-o', '--output PATH', 'Path to a file to write the guessed configuration') do |path|
|
@@ -45,7 +45,7 @@ module Embulk::Guess
|
|
45
45
|
private
|
46
46
|
|
47
47
|
def guess_type(str)
|
48
|
-
if TRUE_STRINGS[str]
|
48
|
+
if TRUE_STRINGS[str] || FALSE_STRINGS[str]
|
49
49
|
return "boolean"
|
50
50
|
end
|
51
51
|
|
@@ -78,7 +78,7 @@ module Embulk::Guess
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# taken from CsvParserPlugin.TRUE_STRINGS
|
81
|
-
TRUE_STRINGS = Hash[
|
81
|
+
TRUE_STRINGS = Hash[%w[
|
82
82
|
true True TRUE
|
83
83
|
yes Yes YES
|
84
84
|
y Y
|
@@ -86,6 +86,15 @@ module Embulk::Guess
|
|
86
86
|
1
|
87
87
|
].map {|k| [k, true] }]
|
88
88
|
|
89
|
+
# When matching to false string, then retrun 'true'
|
90
|
+
FALSE_STRINGS = Hash[%w[
|
91
|
+
false False FALSE
|
92
|
+
no No NO
|
93
|
+
n N
|
94
|
+
off Off OFF
|
95
|
+
0
|
96
|
+
].map {|k| [k, true] }]
|
97
|
+
|
89
98
|
TYPE_COALESCE = Hash[{
|
90
99
|
long: :double,
|
91
100
|
boolean: :long,
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -274,6 +274,7 @@ files:
|
|
274
274
|
- embulk-docs/src/release/release-0.5.0.rst
|
275
275
|
- embulk-docs/src/release/release-0.5.1.rst
|
276
276
|
- embulk-docs/src/release/release-0.5.2.rst
|
277
|
+
- embulk-docs/src/release/release-0.5.3.rst
|
277
278
|
- embulk-standards/build.gradle
|
278
279
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
279
280
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -378,8 +379,8 @@ files:
|
|
378
379
|
- classpath/bval-jsr303-0.5.jar
|
379
380
|
- classpath/commons-beanutils-core-1.8.3.jar
|
380
381
|
- classpath/commons-lang3-3.1.jar
|
381
|
-
- classpath/embulk-core-0.5.
|
382
|
-
- classpath/embulk-standards-0.5.
|
382
|
+
- classpath/embulk-core-0.5.3.jar
|
383
|
+
- classpath/embulk-standards-0.5.3.jar
|
383
384
|
- classpath/guava-18.0.jar
|
384
385
|
- classpath/guice-3.0.jar
|
385
386
|
- classpath/guice-multibindings-3.0.jar
|