embulk 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/build.gradle +1 -0
- data/embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +2 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.0.rst +17 -11
- data/embulk-docs/src/release/release-0.8.1.rst +18 -0
- data/embulk.gemspec +2 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +4 -2
- data/lib/embulk/data/new/ruby/gemspec.erb +2 -2
- data/lib/embulk/java/imports.rb +5 -5
- data/lib/embulk/page_builder.rb +1 -0
- data/lib/embulk/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a189a85d312e0621daf89d790baf4fa45d0e0125
|
4
|
+
data.tar.gz: d7355b10940135fce461332f40d6f8180b3c41c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 09609e12b41ce696b190be750cee3687fcd0f9f04d24597345415080c8ffdd7319ed33129584f8c500a6893c2cdb4cba86d7ddeb34bab0cf4727c2beb3388336
|
7
|
+
data.tar.gz: b46a461b1f845e7020d6ac5c3553ed4ca1a764ae0ae80fccc52d4bd9a35db11fe24ec6ab721fd3ae841f365d6d92fc0f40213d2053d5c9a1c9d74ae6bb8e62f6
|
data/build.gradle
CHANGED
data/embulk-core/build.gradle
CHANGED
@@ -11,6 +11,7 @@ import org.embulk.spi.Schema;
|
|
11
11
|
import org.embulk.spi.time.Timestamp;
|
12
12
|
import org.embulk.spi.Page;
|
13
13
|
import org.embulk.spi.util.Pages;
|
14
|
+
import org.msgpack.value.Value;
|
14
15
|
|
15
16
|
public abstract class PreviewPrinter
|
16
17
|
implements Closeable
|
@@ -68,6 +69,8 @@ public abstract class PreviewPrinter
|
|
68
69
|
return obj.toString();
|
69
70
|
} else if (obj instanceof Timestamp) {
|
70
71
|
return obj.toString();
|
72
|
+
} else if (obj instanceof Value) {
|
73
|
+
return obj.toString();
|
71
74
|
} else {
|
72
75
|
return modelManager.writeObject(obj);
|
73
76
|
}
|
@@ -9,6 +9,7 @@ import org.msgpack.core.buffer.MessageBuffer;
|
|
9
9
|
import org.msgpack.value.Value;
|
10
10
|
import org.jruby.Ruby;
|
11
11
|
import org.jruby.RubyString;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
12
13
|
import org.jruby.util.ByteList;
|
13
14
|
import org.jcodings.specific.ASCIIEncoding;
|
14
15
|
|
@@ -52,4 +53,10 @@ public class RubyValueApi
|
|
52
53
|
throw Throwables.propagate(ex);
|
53
54
|
}
|
54
55
|
}
|
56
|
+
|
57
|
+
public static Value toValue(Ruby runtime, IRubyObject object)
|
58
|
+
{
|
59
|
+
RubyString string = (RubyString) object.callMethod(runtime.getCurrentContext(), "to_msgpack");
|
60
|
+
return fromMessagePack(string);
|
61
|
+
}
|
55
62
|
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.spi.util;
|
2
2
|
|
3
3
|
import org.embulk.spi.time.Timestamp;
|
4
|
+
import org.msgpack.value.Value;
|
4
5
|
|
5
6
|
public interface DynamicColumnSetter
|
6
7
|
{
|
@@ -15,4 +16,6 @@ public interface DynamicColumnSetter
|
|
15
16
|
void set(String value);
|
16
17
|
|
17
18
|
void set(Timestamp value);
|
19
|
+
|
20
|
+
void set(Value value);
|
18
21
|
}
|
@@ -12,6 +12,7 @@ import org.embulk.spi.PageBuilder;
|
|
12
12
|
import org.embulk.spi.Column;
|
13
13
|
import org.embulk.spi.util.DynamicColumnSetter;
|
14
14
|
import org.embulk.spi.time.Timestamp;
|
15
|
+
import org.embulk.spi.json.RubyValueApi;
|
15
16
|
import org.msgpack.value.Value;
|
16
17
|
|
17
18
|
public abstract class AbstractDynamicColumnSetter
|
@@ -76,7 +77,7 @@ public abstract class AbstractDynamicColumnSetter
|
|
76
77
|
int nano = (int) ((msec % 1000) * 1000000 + nsec % 1000000000);
|
77
78
|
set(Timestamp.ofEpochSecond(sec, nano));
|
78
79
|
} else {
|
79
|
-
|
80
|
+
set(RubyValueApi.toValue(rubyObject.getRuntime(), rubyObject));
|
80
81
|
}
|
81
82
|
}
|
82
83
|
}
|
@@ -53,7 +53,7 @@ public class BooleanColumnSetter
|
|
53
53
|
if (TRUE_STRINGS.contains(v)) {
|
54
54
|
pageBuilder.setBoolean(column, true);
|
55
55
|
} else {
|
56
|
-
defaultValue.
|
56
|
+
defaultValue.setBoolean(pageBuilder, column);
|
57
57
|
}
|
58
58
|
}
|
59
59
|
|
@@ -66,6 +66,6 @@ public class BooleanColumnSetter
|
|
66
66
|
@Override
|
67
67
|
public void set(Value v)
|
68
68
|
{
|
69
|
-
|
69
|
+
defaultValue.setBoolean(pageBuilder, column);
|
70
70
|
}
|
71
71
|
}
|
@@ -67,12 +67,12 @@ public class LongColumnSetter
|
|
67
67
|
@Override
|
68
68
|
public void set(Timestamp v)
|
69
69
|
{
|
70
|
-
pageBuilder.
|
70
|
+
pageBuilder.setLong(column, v.getEpochSecond());
|
71
71
|
}
|
72
72
|
|
73
73
|
@Override
|
74
74
|
public void set(Value v)
|
75
75
|
{
|
76
|
-
|
76
|
+
defaultValue.setLong(pageBuilder, column);
|
77
77
|
}
|
78
78
|
}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -1,28 +1,32 @@
|
|
1
1
|
Release 0.8.0
|
2
2
|
==================================
|
3
3
|
|
4
|
-
|
4
|
+
JSON type
|
5
5
|
------------------
|
6
6
|
|
7
|
-
|
7
|
+
Embulk v0.8.0 added JSON type support.
|
8
|
+
|
9
|
+
* A column with ``json`` type can represent nested values such as maps or arrays. This is useful when both input and output support dynamically-typed values.
|
8
10
|
|
9
|
-
|
11
|
+
* **IMPORTANT**: If input plugin uses JSON type but output plugin is compiled with an old embulk (< 0.8.0), a bulk load transaction fails with a confusing error message. To avoid this issue, please run ``embulk migrate /path/to/embulk-plugin-directory`` to upgrade plugin code, and use the latest plugin. This problem doesn't happen if input plugin doesn't use json type.
|
10
12
|
|
11
|
-
|
13
|
+
* Filter plugins to process JSON types are not ready yet. Expected plugins are for example, flatten a json column into statically-typed columns with guess plugin, extracting a value from a json column using an expression (such as JSONPath) and set it to another column, or building a json column by copying values from other columns.
|
12
14
|
|
13
|
-
|
15
|
+
Page scattering
|
16
|
+
------------------
|
14
17
|
|
15
|
-
|
18
|
+
Local executor plugin (the default executor) runs multiple tasks even if there is only 1 input task. This improves performance a lot especially if input is a single huge file.
|
16
19
|
|
17
|
-
|
20
|
+
* Its mechanism is that the executor creates 2, 3, 4, or more number of output tasks for each input task. Page chunks from a input task is scattered to output tasks. All of the tasks run in parallel using threads. This feature is called "page scattering".
|
18
21
|
|
19
|
-
|
22
|
+
* Added ``min_output_tasks`` option at ``exec:`` section. Default is 1x of available CPU cores. Page scattering is enabled if number of input tasks is less than this ``min_output_tasks`` option. Setting larger number here is useful if embulk doesn't use multi-threading with enough concurrency due to too few number of input tasks.
|
20
23
|
|
21
|
-
|
24
|
+
* Added ``max_threads`` option at ``exec:`` section. Default is 2x of availalbe CPU cores. This option controls maximum concurrency. Setting smaller number here is useful if too many threads make the destination or source storage overloaded. Setting larger number here is useful if CPU utilization is too low due to high latency.
|
22
25
|
|
23
|
-
|
26
|
+
* The results of output transaction will be deterministic. There're no randomness that depends on timing. However, task assignment changes if ``min_output_tasks`` changes. If you need deterministic results regardless of machines that may have different number of CPU cores, please add ``min_output_tasks`` option to ``exec:`` section. Setting 1 there will disable page scattering completely.
|
24
27
|
|
25
|
-
|
28
|
+
General Changes
|
29
|
+
------------------
|
26
30
|
|
27
31
|
* YAML configuration parser uses stricter rules when it converts type of a non-quoted strings.
|
28
32
|
|
@@ -32,6 +36,8 @@ General Changes
|
|
32
36
|
|
33
37
|
* On, Off, Yes, and No (case-insensitive) will be a string instead of boolean. Only true, True, false, False are recognized as a boolean.
|
34
38
|
|
39
|
+
* Upraded JRuby version to 9.0.4.0.
|
40
|
+
|
35
41
|
Java Plugin API
|
36
42
|
------------------
|
37
43
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Release 0.8.1
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Ruby-based plugins use ``>=`` instead of ``~>`` to depend on bundler and embulk so that users can use both older plugins and newer plugins together. This assumes major versions of those dependencies most likely don't break backward compatibility.
|
8
|
+
|
9
|
+
* Added bulit-in dependency to msgpack.gem v0.7.4 which is used to support json types at ruby plugins.
|
10
|
+
|
11
|
+
* Fixed json value creation using ruby-based parser and input plugins.
|
12
|
+
|
13
|
+
* Fixed preview of json types.
|
14
|
+
|
15
|
+
|
16
|
+
Release Date
|
17
|
+
------------------
|
18
|
+
2016-01-13
|
data/embulk.gemspec
CHANGED
@@ -20,7 +20,8 @@ Gem::Specification.new do |gem|
|
|
20
20
|
gem.has_rdoc = false
|
21
21
|
|
22
22
|
if RUBY_PLATFORM =~ /java/i
|
23
|
-
gem.add_dependency "bundler", '
|
23
|
+
gem.add_dependency "bundler", '>= 1.10.6'
|
24
|
+
gem.add_dependency "msgpack", '~> 0.7.3'
|
24
25
|
gem.add_dependency "liquid", '~> 3.0.6'
|
25
26
|
|
26
27
|
# For embulk/guess/charset.rb. See also embulk-core/build.gradle
|
@@ -128,10 +128,12 @@ EOF
|
|
128
128
|
if from_ver <= version("0.1.0")
|
129
129
|
# add add_development_dependency
|
130
130
|
migrator.insert_line("**/*.gemspec", /([ \t]*\w+)\.add_development_dependency/) {|m|
|
131
|
-
"#{m[1]}.add_development_dependency 'embulk', ['
|
131
|
+
"#{m[1]}.add_development_dependency 'embulk', ['>= #{Embulk::VERSION}']"
|
132
132
|
}
|
133
133
|
else
|
134
|
-
migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s
|
134
|
+
unless migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s*(\~\>\s*[\d\.]+)\W+/, ">= #{Embulk::VERSION}")
|
135
|
+
migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s*([\d\.]+)\W+/, Embulk::VERSION)
|
136
|
+
end
|
135
137
|
end
|
136
138
|
end
|
137
139
|
|
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.require_paths = ["lib"]
|
15
15
|
|
16
16
|
#spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
|
17
|
-
spec.add_development_dependency 'embulk', ['
|
18
|
-
spec.add_development_dependency 'bundler', ['
|
17
|
+
spec.add_development_dependency 'embulk', ['>= <%= Embulk::VERSION %>']
|
18
|
+
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
19
19
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
20
20
|
end
|
data/lib/embulk/java/imports.rb
CHANGED
@@ -19,23 +19,23 @@ module Embulk::Java
|
|
19
19
|
|
20
20
|
module SPI
|
21
21
|
module Time
|
22
|
-
include_package 'org.embulk.time'
|
22
|
+
include_package 'org.embulk.spi.time'
|
23
23
|
end
|
24
24
|
|
25
25
|
module Json
|
26
|
-
include_package 'org.embulk.json'
|
26
|
+
include_package 'org.embulk.spi.json'
|
27
27
|
end
|
28
28
|
|
29
29
|
module Type
|
30
|
-
include_package 'org.embulk.type'
|
30
|
+
include_package 'org.embulk.spi.type'
|
31
31
|
end
|
32
32
|
|
33
33
|
module Unit
|
34
|
-
include_package 'org.embulk.unit'
|
34
|
+
include_package 'org.embulk.spi.unit'
|
35
35
|
end
|
36
36
|
|
37
37
|
module Util
|
38
|
-
include_package 'org.embulk.util'
|
38
|
+
include_package 'org.embulk.spi.util'
|
39
39
|
end
|
40
40
|
|
41
41
|
include_package 'org.embulk.spi'
|
data/lib/embulk/page_builder.rb
CHANGED
@@ -6,6 +6,7 @@ module Embulk
|
|
6
6
|
|
7
7
|
class PageBuilder
|
8
8
|
def initialize(schema, java_page_output)
|
9
|
+
require 'msgpack' # used at DynamicPageBuilder.set(Value)
|
9
10
|
# TODO get task as an argument
|
10
11
|
task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
|
11
12
|
@page_builder = Java::DynamicPageBuilder.new(task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -107,8 +107,8 @@ files:
|
|
107
107
|
- classpath/bval-jsr303-0.5.jar
|
108
108
|
- classpath/commons-beanutils-core-1.8.3.jar
|
109
109
|
- classpath/commons-lang3-3.1.jar
|
110
|
-
- classpath/embulk-core-0.8.
|
111
|
-
- classpath/embulk-standards-0.8.
|
110
|
+
- classpath/embulk-core-0.8.1.jar
|
111
|
+
- classpath/embulk-standards-0.8.1.jar
|
112
112
|
- classpath/guava-18.0.jar
|
113
113
|
- classpath/guice-4.0.jar
|
114
114
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -418,6 +418,7 @@ files:
|
|
418
418
|
- embulk-docs/src/release/release-0.7.8.rst
|
419
419
|
- embulk-docs/src/release/release-0.7.9.rst
|
420
420
|
- embulk-docs/src/release/release-0.8.0.rst
|
421
|
+
- embulk-docs/src/release/release-0.8.1.rst
|
421
422
|
- embulk-standards/build.gradle
|
422
423
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
423
424
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|