embulk 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0b3174c7826cebe5dd528fb3aa5f577367f30320
4
- data.tar.gz: a9d08926396181e890031f8e57cabf9a2db98e18
3
+ metadata.gz: a189a85d312e0621daf89d790baf4fa45d0e0125
4
+ data.tar.gz: d7355b10940135fce461332f40d6f8180b3c41c2
5
5
  SHA512:
6
- metadata.gz: 3b25dbabad3667aa97ec211c65289f0668d4e9c38ca069012436748be8415368078051b0fffb0bf45b3c40e71473e75136bf1b2fb7b79ca05fb67440aa5ba12a
7
- data.tar.gz: 5842d8686dcbf6e2e52a03a2f56d32a2830657d8b70f4101cb2294281db46b9bfc88bb6ef86fec9094e7edef1c2c3287fd240613d3cf887607049c85533ce4e3
6
+ metadata.gz: 09609e12b41ce696b190be750cee3687fcd0f9f04d24597345415080c8ffdd7319ed33129584f8c500a6893c2cdb4cba86d7ddeb34bab0cf4727c2beb3388336
7
+ data.tar.gz: b46a461b1f845e7020d6ac5c3553ed4ca1a764ae0ae80fccc52d4bd9a35db11fe24ec6ab721fd3ae841f365d6d92fc0f40213d2053d5c9a1c9d74ae6bb8e62f6
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.0'
19
+ version = '0.8.1'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.0.4.0'
@@ -44,6 +44,7 @@ dependencies {
44
44
  compile 'com.ibm.icu:icu4j:54.1.1'
45
45
 
46
46
  gems 'rubygems:bundler:1.10.6'
47
+ gems 'rubygems:msgpack:0.7.4'
47
48
  gems 'rubygems:liquid:3.0.6'
48
49
  }
49
50
 
@@ -11,6 +11,7 @@ import org.embulk.spi.Schema;
11
11
  import org.embulk.spi.time.Timestamp;
12
12
  import org.embulk.spi.Page;
13
13
  import org.embulk.spi.util.Pages;
14
+ import org.msgpack.value.Value;
14
15
 
15
16
  public abstract class PreviewPrinter
16
17
  implements Closeable
@@ -68,6 +69,8 @@ public abstract class PreviewPrinter
68
69
  return obj.toString();
69
70
  } else if (obj instanceof Timestamp) {
70
71
  return obj.toString();
72
+ } else if (obj instanceof Value) {
73
+ return obj.toString();
71
74
  } else {
72
75
  return modelManager.writeObject(obj);
73
76
  }
@@ -9,6 +9,7 @@ import org.msgpack.core.buffer.MessageBuffer;
9
9
  import org.msgpack.value.Value;
10
10
  import org.jruby.Ruby;
11
11
  import org.jruby.RubyString;
12
+ import org.jruby.runtime.builtin.IRubyObject;
12
13
  import org.jruby.util.ByteList;
13
14
  import org.jcodings.specific.ASCIIEncoding;
14
15
 
@@ -52,4 +53,10 @@ public class RubyValueApi
52
53
  throw Throwables.propagate(ex);
53
54
  }
54
55
  }
56
+
57
+ public static Value toValue(Ruby runtime, IRubyObject object)
58
+ {
59
+ RubyString string = (RubyString) object.callMethod(runtime.getCurrentContext(), "to_msgpack");
60
+ return fromMessagePack(string);
61
+ }
55
62
  }
@@ -1,6 +1,7 @@
1
1
  package org.embulk.spi.util;
2
2
 
3
3
  import org.embulk.spi.time.Timestamp;
4
+ import org.msgpack.value.Value;
4
5
 
5
6
  public interface DynamicColumnSetter
6
7
  {
@@ -15,4 +16,6 @@ public interface DynamicColumnSetter
15
16
  void set(String value);
16
17
 
17
18
  void set(Timestamp value);
19
+
20
+ void set(Value value);
18
21
  }
@@ -12,6 +12,7 @@ import org.embulk.spi.PageBuilder;
12
12
  import org.embulk.spi.Column;
13
13
  import org.embulk.spi.util.DynamicColumnSetter;
14
14
  import org.embulk.spi.time.Timestamp;
15
+ import org.embulk.spi.json.RubyValueApi;
15
16
  import org.msgpack.value.Value;
16
17
 
17
18
  public abstract class AbstractDynamicColumnSetter
@@ -76,7 +77,7 @@ public abstract class AbstractDynamicColumnSetter
76
77
  int nano = (int) ((msec % 1000) * 1000000 + nsec % 1000000000);
77
78
  set(Timestamp.ofEpochSecond(sec, nano));
78
79
  } else {
79
- throw rubyObject.getRuntime().newTypeError("cannot convert instance of " + rubyObject.getMetaClass() + " to nil, true, false, Integer, Float, String, or Time");
80
+ set(RubyValueApi.toValue(rubyObject.getRuntime(), rubyObject));
80
81
  }
81
82
  }
82
83
  }
@@ -53,7 +53,7 @@ public class BooleanColumnSetter
53
53
  if (TRUE_STRINGS.contains(v)) {
54
54
  pageBuilder.setBoolean(column, true);
55
55
  } else {
56
- defaultValue.setDouble(pageBuilder, column);
56
+ defaultValue.setBoolean(pageBuilder, column);
57
57
  }
58
58
  }
59
59
 
@@ -66,6 +66,6 @@ public class BooleanColumnSetter
66
66
  @Override
67
67
  public void set(Value v)
68
68
  {
69
- pageBuilder.setJson(column, v);
69
+ defaultValue.setBoolean(pageBuilder, column);
70
70
  }
71
71
  }
@@ -63,6 +63,6 @@ public class DoubleColumnSetter
63
63
  @Override
64
64
  public void set(Value v)
65
65
  {
66
- pageBuilder.setJson(column, v);
66
+ defaultValue.setDouble(pageBuilder, column);
67
67
  }
68
68
  }
@@ -67,12 +67,12 @@ public class LongColumnSetter
67
67
  @Override
68
68
  public void set(Timestamp v)
69
69
  {
70
- pageBuilder.setDouble(column, v.getEpochSecond());
70
+ pageBuilder.setLong(column, v.getEpochSecond());
71
71
  }
72
72
 
73
73
  @Override
74
74
  public void set(Value v)
75
75
  {
76
- pageBuilder.setJson(column, v);
76
+ defaultValue.setLong(pageBuilder, column);
77
77
  }
78
78
  }
@@ -58,6 +58,6 @@ public class StringColumnSetter
58
58
  @Override
59
59
  public void set(Value v)
60
60
  {
61
- pageBuilder.setJson(column, v);
61
+ pageBuilder.setString(column, v.toJson());
62
62
  }
63
63
  }
@@ -67,6 +67,6 @@ public class TimestampColumnSetter
67
67
  @Override
68
68
  public void set(Value v)
69
69
  {
70
- pageBuilder.setJson(column, v);
70
+ defaultValue.setTimestamp(pageBuilder, column);
71
71
  }
72
72
  }
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.1
7
8
  release/release-0.8.0
8
9
  release/release-0.7.10
9
10
  release/release-0.7.9
@@ -1,28 +1,32 @@
1
1
  Release 0.8.0
2
2
  ==================================
3
3
 
4
- General Changes
4
+ JSON type
5
5
  ------------------
6
6
 
7
- * Added JSON type support.
7
+ Embulk v0.8.0 added JSON type support.
8
+
9
+ * A column with ``json`` type can represent nested values such as maps or arrays. This is useful when both input and output support dynamically-typed values.
8
10
 
9
- * A column with ``json`` type can represent nested values such as maps or arrays. This is useful when both input and output support dynamically-typed values.
11
+ * **IMPORTANT**: If input plugin uses JSON type but output plugin is compiled with an old embulk (< 0.8.0), a bulk load transaction fails with a confusing error message. To avoid this issue, please run ``embulk migrate /path/to/embulk-plugin-directory`` to upgrade plugin code, and use the latest plugin. This problem doesn't happen if input plugin doesn't use json type.
10
12
 
11
- * **IMPORTANT**: If input plugin uses JSON type but output plugin is compiled with an old embulk (< 0.8.0), a bulk load transaction fails with a confusing error message. To avoid this issue, please run ``embulk migrate /path/to/embulk-plugin-directory`` to upgrade plugin code, and use the latest plugin. This problem doesn't happen if input plugin doesn't use json type.
13
+ * Filter plugins to process JSON types are not ready yet. Expected plugins are for example, flatten a json column into statically-typed columns with guess plugin, extracting a value from a json column using an expression (such as JSONPath) and set it to another column, or building a json column by copying values from other columns.
12
14
 
13
- * Filter plugins to process JSON types are not ready yet. Expected plugins are for example, flatten a json column into statically-typed columns with guess plugin, extracting a value from a json column using an expression (such as JSONPath) and set it to another column, or building a json column by copying values from other columns.
15
+ Page scattering
16
+ ------------------
14
17
 
15
- * Local executor plugin (the default executor) runs multiple tasks even if there is only 1 input task. This improves performance a lot especially if input is a single huge file.
18
+ Local executor plugin (the default executor) runs multiple tasks even if there is only 1 input task. This improves performance a lot especially if input is a single huge file.
16
19
 
17
- * Its mechanism is that the executor creates 2, 3, 4, or more number of output tasks for each input task. Page chunks from a input task is scattered to output tasks. All of the tasks run in parallel using threads. This feature is called "page scattering".
20
+ * Its mechanism is that the executor creates 2, 3, 4, or more number of output tasks for each input task. Page chunks from a input task is scattered to output tasks. All of the tasks run in parallel using threads. This feature is called "page scattering".
18
21
 
19
- * Added ``min_output_tasks`` option at ``exec:`` section. Default is 1x of available CPU cores. Page scattering is enabled if number of input tasks is less than this ``min_output_tasks`` option. Setting larger number here is useful if embulk doesn't use multi-threading with enough concurrency due to too few number of input tasks.
22
+ * Added ``min_output_tasks`` option at ``exec:`` section. Default is 1x of available CPU cores. Page scattering is enabled if number of input tasks is less than this ``min_output_tasks`` option. Setting larger number here is useful if embulk doesn't use multi-threading with enough concurrency due to too few number of input tasks.
20
23
 
21
- * Added ``max_threads`` option at ``exec:`` section. Default is 2x of availalbe CPU cores. This option controls maximum concurrency. Setting smaller number here is useful if too many threads make the destination or source storage overloaded. Setting larger number here is useful if CPU utilization is too low due to high latency.
24
+ * Added ``max_threads`` option at ``exec:`` section. Default is 2x of availalbe CPU cores. This option controls maximum concurrency. Setting smaller number here is useful if too many threads make the destination or source storage overloaded. Setting larger number here is useful if CPU utilization is too low due to high latency.
22
25
 
23
- * The results of output transaction will be deterministic. There're no randomness that depends on timing. However, task assignment changes if ``min_output_tasks`` changes. If you need deterministic results regardless of machines that may have different number of CPU cores, please add ``min_output_tasks`` option to ``exec:`` section. Setting 1 there will disable page scattering completely.
26
+ * The results of output transaction will be deterministic. There're no randomness that depends on timing. However, task assignment changes if ``min_output_tasks`` changes. If you need deterministic results regardless of machines that may have different number of CPU cores, please add ``min_output_tasks`` option to ``exec:`` section. Setting 1 there will disable page scattering completely.
24
27
 
25
- * Upraded JRuby version to 9.0.4.0.
28
+ General Changes
29
+ ------------------
26
30
 
27
31
  * YAML configuration parser uses stricter rules when it converts type of a non-quoted strings.
28
32
 
@@ -32,6 +36,8 @@ General Changes
32
36
 
33
37
  * On, Off, Yes, and No (case-insensitive) will be a string instead of boolean. Only true, True, false, False are recognized as a boolean.
34
38
 
39
+ * Upraded JRuby version to 9.0.4.0.
40
+
35
41
  Java Plugin API
36
42
  ------------------
37
43
 
@@ -0,0 +1,18 @@
1
+ Release 0.8.1
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Ruby-based plugins use ``>=`` instead of ``~>`` to depend on bundler and embulk so that users can use both older plugins and newer plugins together. This assumes major versions of those dependencies most likely don't break backward compatibility.
8
+
9
+ * Added bulit-in dependency to msgpack.gem v0.7.4 which is used to support json types at ruby plugins.
10
+
11
+ * Fixed json value creation using ruby-based parser and input plugins.
12
+
13
+ * Fixed preview of json types.
14
+
15
+
16
+ Release Date
17
+ ------------------
18
+ 2016-01-13
@@ -20,7 +20,8 @@ Gem::Specification.new do |gem|
20
20
  gem.has_rdoc = false
21
21
 
22
22
  if RUBY_PLATFORM =~ /java/i
23
- gem.add_dependency "bundler", '~> 1.10.6'
23
+ gem.add_dependency "bundler", '>= 1.10.6'
24
+ gem.add_dependency "msgpack", '~> 0.7.3'
24
25
  gem.add_dependency "liquid", '~> 3.0.6'
25
26
 
26
27
  # For embulk/guess/charset.rb. See also embulk-core/build.gradle
@@ -128,10 +128,12 @@ EOF
128
128
  if from_ver <= version("0.1.0")
129
129
  # add add_development_dependency
130
130
  migrator.insert_line("**/*.gemspec", /([ \t]*\w+)\.add_development_dependency/) {|m|
131
- "#{m[1]}.add_development_dependency 'embulk', ['~> #{Embulk::VERSION}']"
131
+ "#{m[1]}.add_development_dependency 'embulk', ['>= #{Embulk::VERSION}']"
132
132
  }
133
133
  else
134
- migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s+([\d\.]+)\W+/, Embulk::VERSION)
134
+ unless migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s*(\~\>\s*[\d\.]+)\W+/, ">= #{Embulk::VERSION}")
135
+ migrator.replace("**/*.gemspec", /add_(?:development_)?dependency\s+\W+embulk\W+\s*([\d\.]+)\W+/, Embulk::VERSION)
136
+ end
135
137
  end
136
138
  end
137
139
 
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.require_paths = ["lib"]
15
15
 
16
16
  #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
17
- spec.add_development_dependency 'embulk', ['~> <%= Embulk::VERSION %>']
18
- spec.add_development_dependency 'bundler', ['~> 1.0']
17
+ spec.add_development_dependency 'embulk', ['>= <%= Embulk::VERSION %>']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
19
  spec.add_development_dependency 'rake', ['>= 10.0']
20
20
  end
@@ -19,23 +19,23 @@ module Embulk::Java
19
19
 
20
20
  module SPI
21
21
  module Time
22
- include_package 'org.embulk.time'
22
+ include_package 'org.embulk.spi.time'
23
23
  end
24
24
 
25
25
  module Json
26
- include_package 'org.embulk.json'
26
+ include_package 'org.embulk.spi.json'
27
27
  end
28
28
 
29
29
  module Type
30
- include_package 'org.embulk.type'
30
+ include_package 'org.embulk.spi.type'
31
31
  end
32
32
 
33
33
  module Unit
34
- include_package 'org.embulk.unit'
34
+ include_package 'org.embulk.spi.unit'
35
35
  end
36
36
 
37
37
  module Util
38
- include_package 'org.embulk.util'
38
+ include_package 'org.embulk.spi.util'
39
39
  end
40
40
 
41
41
  include_package 'org.embulk.spi'
@@ -6,6 +6,7 @@ module Embulk
6
6
 
7
7
  class PageBuilder
8
8
  def initialize(schema, java_page_output)
9
+ require 'msgpack' # used at DynamicPageBuilder.set(Value)
9
10
  # TODO get task as an argument
10
11
  task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
11
12
  @page_builder = Java::DynamicPageBuilder.new(task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.8.0'
2
+ VERSION = '0.8.1'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -107,8 +107,8 @@ files:
107
107
  - classpath/bval-jsr303-0.5.jar
108
108
  - classpath/commons-beanutils-core-1.8.3.jar
109
109
  - classpath/commons-lang3-3.1.jar
110
- - classpath/embulk-core-0.8.0.jar
111
- - classpath/embulk-standards-0.8.0.jar
110
+ - classpath/embulk-core-0.8.1.jar
111
+ - classpath/embulk-standards-0.8.1.jar
112
112
  - classpath/guava-18.0.jar
113
113
  - classpath/guice-4.0.jar
114
114
  - classpath/guice-bootstrap-0.1.1.jar
@@ -418,6 +418,7 @@ files:
418
418
  - embulk-docs/src/release/release-0.7.8.rst
419
419
  - embulk-docs/src/release/release-0.7.9.rst
420
420
  - embulk-docs/src/release/release-0.8.0.rst
421
+ - embulk-docs/src/release/release-0.8.1.rst
421
422
  - embulk-standards/build.gradle
422
423
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
423
424
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java