embulk 0.8.13-java → 0.8.14-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +7 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +4 -1
- data/embulk-docs/src/_static/embulk-logo.png +0 -0
- data/embulk-docs/src/built-in.rst +37 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.14.rst +31 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +40 -3
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +52 -0
- data/embulk.gemspec +1 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +1 -1
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/file_input_plugin.rb +1 -1
- data/lib/embulk/file_output_plugin.rb +1 -1
- data/lib/embulk/guess/csv.rb +9 -5
- data/lib/embulk/guess/csv_all_strings.rb +13 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java_plugin.rb +8 -8
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_all_strings.rb +43 -0
- metadata +35 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 353b653ad68c876426f467ef64c82698598e9a59
|
4
|
+
data.tar.gz: 78a19ab05e47c9334267c02e4b4ea7ce2a0a3467
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 527b918b1809e5e04cbe5eaf51efd589d753589aa1569168dd8801cc89a680388aa2ef13c4b3041a99ced2637a9ff2751db3248eca05ae34a5ec3e6228af46b7
|
7
|
+
data.tar.gz: cf8ffc06d1e43b2b85c68d9f815b99265c2b48e65460ff97b05a8f6e73297d197390f5e7a850a01ff5482415d725a31bcca3ffd769c38ffac84492c7b1afd60a
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.8.
|
5
|
-
jruby-jars (= 9.1.
|
4
|
+
embulk (0.8.13)
|
5
|
+
jruby-jars (= 9.1.5.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.1.
|
10
|
+
jruby-jars (9.1.5.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
@@ -27,4 +27,4 @@ DEPENDENCIES
|
|
27
27
|
yard (~> 0.8.7)
|
28
28
|
|
29
29
|
BUNDLED WITH
|
30
|
-
1.
|
30
|
+
1.13.2
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.14'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.1.
|
22
|
+
jrubyVersion = '9.1.5.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -38,7 +38,7 @@ dependencies {
|
|
38
38
|
compile 'joda-time:joda-time:2.9.2'
|
39
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
40
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
-
compile 'org.msgpack:msgpack-core:0.8.
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.11'
|
42
42
|
|
43
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
44
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -31,14 +31,19 @@ public class Schema
|
|
31
31
|
return new Builder();
|
32
32
|
}
|
33
33
|
|
34
|
-
private final
|
34
|
+
private final ImmutableList<Column> columns;
|
35
35
|
|
36
36
|
@JsonCreator
|
37
37
|
public Schema(List<Column> columns)
|
38
38
|
{
|
39
|
-
this.columns = columns;
|
39
|
+
this.columns = ImmutableList.copyOf(columns);
|
40
40
|
}
|
41
41
|
|
42
|
+
/**
|
43
|
+
* Returns the list of Column objects.
|
44
|
+
*
|
45
|
+
* It always returns an immutable list.
|
46
|
+
*/
|
42
47
|
@JsonValue
|
43
48
|
public List<Column> getColumns()
|
44
49
|
{
|
@@ -32,7 +32,10 @@ public class ResumableInputStream
|
|
32
32
|
private void reopen(Exception closedCause) throws IOException
|
33
33
|
{
|
34
34
|
if (in != null) {
|
35
|
-
|
35
|
+
try {
|
36
|
+
in.close();
|
37
|
+
} catch (IOException ignored) {
|
38
|
+
}
|
36
39
|
in = null;
|
37
40
|
}
|
38
41
|
in = reopener.reopen(offset, closedCause);
|
Binary file
|
@@ -633,3 +633,40 @@ Example
|
|
633
633
|
out:
|
634
634
|
type: ...
|
635
635
|
...
|
636
|
+
|
637
|
+
Guess executor
|
638
|
+
------------------
|
639
|
+
|
640
|
+
The guess executor is called by ``guess`` command. It executes default guess plugins in a sequential order and suggests Embulk config by appropriate guess plugin. The default guess plugins and the order are ``gzip``, ``'bzip2``, ``json`` and ``csv``.
|
641
|
+
|
642
|
+
Options
|
643
|
+
~~~~~~~~~~~~~~~~~~
|
644
|
+
|
645
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
646
|
+
| name | type | description | required? |
|
647
|
+
+=======================+==========+======================================================================+======================================+
|
648
|
+
| guess_plugins | array | ``guess`` command uses specified guess plugins. | ``[]`` by default |
|
649
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
650
|
+
| exclude_guess_plugins | array | ``guess`` command doesn't use specified plugins. | ``[]`` by default |
|
651
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
652
|
+
|
653
|
+
The ``guess_plugins`` option includes specified guess plugin in the bottom of the list of default guess plugins.
|
654
|
+
|
655
|
+
The ``exclude_guess_plugins`` option exclude specified guess plugins from the list of default guess plugins that the guess executor uses.
|
656
|
+
|
657
|
+
This example shows how to use ``csv_all_strings`` guess plugin, which suggests column types within CSV files as string types. It needs to be explicitly specified by users when it's used instead of ``csv`` guess plugin because the plugin is not included in default guess plugins. We also can exclude default ``csv`` guess plugin.
|
658
|
+
|
659
|
+
Example
|
660
|
+
~~~~~~~~~~~~~~~~~~
|
661
|
+
|
662
|
+
.. code-block:: yaml
|
663
|
+
|
664
|
+
exec:
|
665
|
+
guess_plugins: ['csv_all_strings']
|
666
|
+
exclude_guess_plugins: ['csv']
|
667
|
+
in:
|
668
|
+
type: ...
|
669
|
+
...
|
670
|
+
out:
|
671
|
+
type: ...
|
672
|
+
...
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
Release 0.8.14
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Improved IOException handling within ResumableInputStream#reopen method. [#470]
|
8
|
+
|
9
|
+
* Added "rules" config option to rename filter plugin. It allows users to specify rule based policy to rename columns. [#483]
|
10
|
+
|
11
|
+
* Added "csv_all_strings" guess plugin, which suggests columns in csv files as string typed columns. [#489]
|
12
|
+
|
13
|
+
* Updated JRuby from 9.1.2.0 to 9.1.5.0. Release notes:
|
14
|
+
|
15
|
+
* http://jruby.org/2016/08/30/jruby-9-1-3-0.html
|
16
|
+
|
17
|
+
* http://jruby.org/2016/09/01/jruby-9-1-4-0.html
|
18
|
+
|
19
|
+
* http://jruby.org/2016/09/06/jruby-9-1-5-0.html
|
20
|
+
|
21
|
+
* Updated msgpack-java from 0.8.8 to 0.8.11. Release notes
|
22
|
+
|
23
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.9/RELEASE_NOTES.md
|
24
|
+
|
25
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.10/RELEASE_NOTES.md
|
26
|
+
|
27
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.11/RELEASE_NOTES.md
|
28
|
+
|
29
|
+
Release Date
|
30
|
+
------------------
|
31
|
+
2016-10-21
|
@@ -2,6 +2,7 @@ package org.embulk.standards;
|
|
2
2
|
|
3
3
|
import org.embulk.config.Config;
|
4
4
|
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.ConfigException;
|
5
6
|
import org.embulk.config.ConfigSource;
|
6
7
|
import org.embulk.config.Task;
|
7
8
|
import org.embulk.config.TaskSource;
|
@@ -10,8 +11,10 @@ import org.embulk.spi.FilterPlugin;
|
|
10
11
|
import org.embulk.spi.PageOutput;
|
11
12
|
import org.embulk.spi.Schema;
|
12
13
|
|
14
|
+
import java.util.List;
|
13
15
|
import java.util.Map;
|
14
16
|
|
17
|
+
|
15
18
|
public class RenameFilterPlugin
|
16
19
|
implements FilterPlugin
|
17
20
|
{
|
@@ -21,6 +24,10 @@ public class RenameFilterPlugin
|
|
21
24
|
@Config("columns")
|
22
25
|
@ConfigDefault("{}")
|
23
26
|
Map<String, String> getRenameMap();
|
27
|
+
|
28
|
+
@Config("rules")
|
29
|
+
@ConfigDefault("[]")
|
30
|
+
List<ConfigSource> getRulesList();
|
24
31
|
}
|
25
32
|
|
26
33
|
@Override
|
@@ -29,12 +36,14 @@ public class RenameFilterPlugin
|
|
29
36
|
{
|
30
37
|
PluginTask task = config.loadConfig(PluginTask.class);
|
31
38
|
Map<String, String> renameMap = task.getRenameMap();
|
39
|
+
List<ConfigSource> rulesList = task.getRulesList();
|
32
40
|
|
33
|
-
//
|
41
|
+
// Check if the given column in "columns" exists or not.
|
34
42
|
for (String columnName : renameMap.keySet()) {
|
35
43
|
inputSchema.lookupColumn(columnName); // throws SchemaConfigException
|
36
44
|
}
|
37
45
|
|
46
|
+
// Rename by "columns": to be applied before "rules".
|
38
47
|
Schema.Builder builder = Schema.builder();
|
39
48
|
for (Column column : inputSchema.getColumns()) {
|
40
49
|
String name = column.getName();
|
@@ -43,8 +52,16 @@ public class RenameFilterPlugin
|
|
43
52
|
}
|
44
53
|
builder.add(name, column.getType());
|
45
54
|
}
|
55
|
+
Schema intermediateSchema = builder.build();
|
56
|
+
|
57
|
+
// Rename by "rules".
|
58
|
+
Schema outputSchema = intermediateSchema;
|
59
|
+
for (ConfigSource rule : rulesList) {
|
60
|
+
outputSchema = applyRule(rule, intermediateSchema);
|
61
|
+
intermediateSchema = outputSchema;
|
62
|
+
}
|
46
63
|
|
47
|
-
control.run(task.dump(),
|
64
|
+
control.run(task.dump(), outputSchema);
|
48
65
|
}
|
49
66
|
|
50
67
|
@Override
|
@@ -53,4 +70,24 @@ public class RenameFilterPlugin
|
|
53
70
|
{
|
54
71
|
return output;
|
55
72
|
}
|
56
|
-
|
73
|
+
|
74
|
+
|
75
|
+
// Extending Task is required to be deserialized with ConfigSource.loadConfig()
|
76
|
+
// although this Rule is not really a Task.
|
77
|
+
// TODO(dmikurube): Revisit this to consider how not to extend Task for this.
|
78
|
+
private interface Rule
|
79
|
+
extends Task
|
80
|
+
{
|
81
|
+
@Config("rule")
|
82
|
+
String getRule();
|
83
|
+
}
|
84
|
+
|
85
|
+
private Schema applyRule(ConfigSource ruleConfig, Schema inputSchema) throws ConfigException
|
86
|
+
{
|
87
|
+
Rule rule = ruleConfig.loadConfig(Rule.class);
|
88
|
+
switch (rule.getRule()) {
|
89
|
+
default:
|
90
|
+
throw new ConfigException("Renaming rule \"" +rule+ "\" is unknown");
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
@@ -1,7 +1,9 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
|
+
import com.google.common.collect.ImmutableList;
|
3
4
|
import com.google.common.collect.ImmutableMap;
|
4
5
|
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigException;
|
5
7
|
import org.embulk.config.ConfigSource;
|
6
8
|
import org.embulk.config.TaskSource;
|
7
9
|
import org.embulk.spi.Column;
|
@@ -85,4 +87,54 @@ public class TestRenameFilterPlugin
|
|
85
87
|
}
|
86
88
|
});
|
87
89
|
}
|
90
|
+
|
91
|
+
@Test
|
92
|
+
public void checkConfigExceptionIfUnknownStringTypeOfRenamingOperator()
|
93
|
+
{
|
94
|
+
// A simple string shouldn't come as a renaming rule.
|
95
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
96
|
+
.set("rules", ImmutableList.of("string_rule"));
|
97
|
+
|
98
|
+
try {
|
99
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
100
|
+
public void run(TaskSource task, Schema schema) { }
|
101
|
+
});
|
102
|
+
fail();
|
103
|
+
} catch (Throwable t) {
|
104
|
+
assertTrue(t instanceof ConfigException);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
@Test
|
109
|
+
public void checkConfigExceptionIfUnknownListTypeOfRenamingOperator()
|
110
|
+
{
|
111
|
+
// A list [] shouldn't come as a renaming rule.
|
112
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
113
|
+
.set("rules", ImmutableList.of(ImmutableList.of("listed_operator1", "listed_operator2")));
|
114
|
+
|
115
|
+
try {
|
116
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
117
|
+
public void run(TaskSource task, Schema schema) { }
|
118
|
+
});
|
119
|
+
fail();
|
120
|
+
} catch (Throwable t) {
|
121
|
+
assertTrue(t instanceof ConfigException);
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
@Test
|
126
|
+
public void checkConfigExceptionIfUnknownRenamingOperatorName()
|
127
|
+
{
|
128
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
129
|
+
.set("rules", ImmutableList.of(ImmutableMap.of("rule", "some_unknown_renaming_operator")));
|
130
|
+
|
131
|
+
try {
|
132
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
133
|
+
public void run(TaskSource task, Schema schema) { }
|
134
|
+
});
|
135
|
+
fail();
|
136
|
+
} catch (Throwable t) {
|
137
|
+
assertTrue(t instanceof ConfigException);
|
138
|
+
}
|
139
|
+
}
|
88
140
|
}
|
data/embulk.gemspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.1.
|
1
|
+
jruby-9.1.5.0
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.1.
|
1
|
+
jruby-9.1.5.0
|
@@ -13,7 +13,7 @@ module Embulk
|
|
13
13
|
module RubyAdapter
|
14
14
|
module ClassMethods
|
15
15
|
def new_java
|
16
|
-
Java::FileInputRunner.new(Java.injector.getInstance(
|
16
|
+
Java::FileInputRunner.new(Java.injector.getInstance(plugin_java_class))
|
17
17
|
end
|
18
18
|
# TODO transaction, resume, cleanup
|
19
19
|
end
|
@@ -13,7 +13,7 @@ module Embulk
|
|
13
13
|
module RubyAdapter
|
14
14
|
module ClassMethods
|
15
15
|
def new_java
|
16
|
-
Java::FileOutputRunner.new(Java.injector.getInstance(
|
16
|
+
Java::FileOutputRunner.new(Java.injector.getInstance(plugin_java_class))
|
17
17
|
end
|
18
18
|
# TODO transaction, resume, cleanup
|
19
19
|
end
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -142,11 +142,7 @@ module Embulk
|
|
142
142
|
schema = []
|
143
143
|
column_names.zip(other_types).each do |name,type|
|
144
144
|
if name && type
|
145
|
-
|
146
|
-
schema << {"name" => name, "type" => type, "format" => type.format}
|
147
|
-
else
|
148
|
-
schema << {"name" => name, "type" => type}
|
149
|
-
end
|
145
|
+
schema << new_column(name, type)
|
150
146
|
end
|
151
147
|
end
|
152
148
|
parser_guessed["columns"] = schema
|
@@ -154,6 +150,14 @@ module Embulk
|
|
154
150
|
return {"parser" => parser_guessed}
|
155
151
|
end
|
156
152
|
|
153
|
+
def new_column(name, type)
|
154
|
+
if type.is_a?(SchemaGuess::TimestampTypeMatch)
|
155
|
+
{"name" => name, "type" => type, "format" => type.format}
|
156
|
+
else
|
157
|
+
{"name" => name, "type" => type}
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
157
161
|
private
|
158
162
|
|
159
163
|
def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
require 'embulk/guess/csv'
|
4
|
+
|
5
|
+
class CsvAllStringsGuessPlugin < CsvGuessPlugin
|
6
|
+
Plugin.register_guess("csv_all_strings", self)
|
7
|
+
|
8
|
+
def new_column(name, type)
|
9
|
+
{"name" => name, "type" => "string"}
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -37,7 +37,7 @@ module Embulk
|
|
37
37
|
def guess(config, sample)
|
38
38
|
java_config = config.to_java
|
39
39
|
java_sample = sample.to_java
|
40
|
-
java_config_diff =
|
40
|
+
java_config_diff = plugin_java_object.guess(java_config, java_sample)
|
41
41
|
return DataSource.from_java(java_config_diff)
|
42
42
|
end
|
43
43
|
end
|
data/lib/embulk/java_plugin.rb
CHANGED
@@ -57,20 +57,20 @@ module Embulk
|
|
57
57
|
|
58
58
|
def self.ruby_adapter_class(java_class, ruby_base_class, ruby_module)
|
59
59
|
Class.new(ruby_base_class) do
|
60
|
-
const_set(:
|
60
|
+
const_set(:PLUGIN_JAVA_CLASS, java_class)
|
61
61
|
|
62
62
|
include ruby_module
|
63
63
|
extend ruby_module::ClassMethods
|
64
64
|
|
65
|
-
unless method_defined?(:
|
66
|
-
def
|
67
|
-
@
|
65
|
+
unless method_defined?(:plugin_java_object)
|
66
|
+
def plugin_java_object
|
67
|
+
@plugin_java_object ||= self.class.new_java
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
unless (class<<self;self;end).method_defined?(:
|
72
|
-
def self.
|
73
|
-
self::
|
71
|
+
unless (class<<self;self;end).method_defined?(:plugin_java_class)
|
72
|
+
def self.plugin_java_class
|
73
|
+
self::PLUGIN_JAVA_CLASS
|
74
74
|
end
|
75
75
|
end
|
76
76
|
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
# ruby_module::ClassMethods includes other modules.
|
82
82
|
unless ruby_module::ClassMethods.method_defined?(:new_java)
|
83
83
|
def self.new_java
|
84
|
-
Java.injector.getInstance(
|
84
|
+
Java.injector.getInstance(plugin_java_class)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
data/lib/embulk/version.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'time'
|
3
|
+
require 'embulk/guess/csv_all_strings'
|
4
|
+
|
5
|
+
class CsvAllStringsGuessTest < ::Test::Unit::TestCase
|
6
|
+
class TestAllStrings < self
|
7
|
+
def test_columns_without_header
|
8
|
+
actual = guess([
|
9
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
10
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
11
|
+
])
|
12
|
+
expected = [
|
13
|
+
{"name" => "c0", "type" => "string"},
|
14
|
+
{"name" => "c1", "type" => "string"},
|
15
|
+
{"name" => "c2", "type" => "string"},
|
16
|
+
]
|
17
|
+
assert_equal expected, actual["parser"]["columns"]
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_columns_with_header
|
21
|
+
actual = guess([
|
22
|
+
"num\tstr\ttime",
|
23
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
24
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
25
|
+
])
|
26
|
+
expected = [
|
27
|
+
{"name" => "num", "type" => "string"},
|
28
|
+
{"name" => "str", "type" => "string"},
|
29
|
+
{"name" => "time", "type" => "string"},
|
30
|
+
]
|
31
|
+
assert_equal expected, actual["parser"]["columns"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def guess(texts)
|
36
|
+
conf = Embulk::DataSource.new({
|
37
|
+
parser: {
|
38
|
+
type: "csv"
|
39
|
+
}
|
40
|
+
})
|
41
|
+
Embulk::Guess::CsvAllStringsGuessPlugin.new.guess_lines(conf, Array(texts))
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,127 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.14
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: 1.10.6
|
19
|
+
name: bundler
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
20
22
|
version_requirements: !ruby/object:Gem::Requirement
|
21
23
|
requirements:
|
22
24
|
- - ">="
|
23
25
|
- !ruby/object:Gem::Version
|
24
26
|
version: 1.10.6
|
25
|
-
prerelease: false
|
26
|
-
type: :runtime
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: msgpack
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
30
|
- - "~>"
|
32
31
|
- !ruby/object:Gem::Version
|
33
32
|
version: 0.7.3
|
33
|
+
name: msgpack
|
34
|
+
prerelease: false
|
35
|
+
type: :runtime
|
34
36
|
version_requirements: !ruby/object:Gem::Requirement
|
35
37
|
requirements:
|
36
38
|
- - "~>"
|
37
39
|
- !ruby/object:Gem::Version
|
38
40
|
version: 0.7.3
|
39
|
-
prerelease: false
|
40
|
-
type: :runtime
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name: liquid
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
44
43
|
requirements:
|
45
44
|
- - "~>"
|
46
45
|
- !ruby/object:Gem::Version
|
47
46
|
version: 3.0.6
|
47
|
+
name: liquid
|
48
|
+
prerelease: false
|
49
|
+
type: :runtime
|
48
50
|
version_requirements: !ruby/object:Gem::Requirement
|
49
51
|
requirements:
|
50
52
|
- - "~>"
|
51
53
|
- !ruby/object:Gem::Version
|
52
54
|
version: 3.0.6
|
53
|
-
prerelease: false
|
54
|
-
type: :runtime
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name: rjack-icu
|
57
56
|
requirement: !ruby/object:Gem::Requirement
|
58
57
|
requirements:
|
59
58
|
- - "~>"
|
60
59
|
- !ruby/object:Gem::Version
|
61
60
|
version: 4.54.1.1
|
61
|
+
name: rjack-icu
|
62
|
+
prerelease: false
|
63
|
+
type: :runtime
|
62
64
|
version_requirements: !ruby/object:Gem::Requirement
|
63
65
|
requirements:
|
64
66
|
- - "~>"
|
65
67
|
- !ruby/object:Gem::Version
|
66
68
|
version: 4.54.1.1
|
67
|
-
prerelease: false
|
68
|
-
type: :runtime
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name: rake
|
71
70
|
requirement: !ruby/object:Gem::Requirement
|
72
71
|
requirements:
|
73
72
|
- - ">="
|
74
73
|
- !ruby/object:Gem::Version
|
75
74
|
version: 0.10.0
|
75
|
+
name: rake
|
76
|
+
prerelease: false
|
77
|
+
type: :development
|
76
78
|
version_requirements: !ruby/object:Gem::Requirement
|
77
79
|
requirements:
|
78
80
|
- - ">="
|
79
81
|
- !ruby/object:Gem::Version
|
80
82
|
version: 0.10.0
|
81
|
-
prerelease: false
|
82
|
-
type: :development
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name: test-unit
|
85
84
|
requirement: !ruby/object:Gem::Requirement
|
86
85
|
requirements:
|
87
86
|
- - "~>"
|
88
87
|
- !ruby/object:Gem::Version
|
89
88
|
version: 3.0.9
|
89
|
+
name: test-unit
|
90
|
+
prerelease: false
|
91
|
+
type: :development
|
90
92
|
version_requirements: !ruby/object:Gem::Requirement
|
91
93
|
requirements:
|
92
94
|
- - "~>"
|
93
95
|
- !ruby/object:Gem::Version
|
94
96
|
version: 3.0.9
|
95
|
-
prerelease: false
|
96
|
-
type: :development
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name: yard
|
99
98
|
requirement: !ruby/object:Gem::Requirement
|
100
99
|
requirements:
|
101
100
|
- - "~>"
|
102
101
|
- !ruby/object:Gem::Version
|
103
102
|
version: 0.8.7
|
103
|
+
name: yard
|
104
|
+
prerelease: false
|
105
|
+
type: :development
|
104
106
|
version_requirements: !ruby/object:Gem::Requirement
|
105
107
|
requirements:
|
106
108
|
- - "~>"
|
107
109
|
- !ruby/object:Gem::Version
|
108
110
|
version: 0.8.7
|
109
|
-
prerelease: false
|
110
|
-
type: :development
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name: kramdown
|
113
112
|
requirement: !ruby/object:Gem::Requirement
|
114
113
|
requirements:
|
115
114
|
- - "~>"
|
116
115
|
- !ruby/object:Gem::Version
|
117
116
|
version: 1.5.0
|
117
|
+
name: kramdown
|
118
|
+
prerelease: false
|
119
|
+
type: :development
|
118
120
|
version_requirements: !ruby/object:Gem::Requirement
|
119
121
|
requirements:
|
120
122
|
- - "~>"
|
121
123
|
- !ruby/object:Gem::Version
|
122
124
|
version: 1.5.0
|
123
|
-
prerelease: false
|
124
|
-
type: :development
|
125
125
|
description: Embulk is an open-source, plugin-based bulk data loader to scale and simplify data management across heterogeneous data stores. It can collect and ship any kinds of data in high throughput with transaction control.
|
126
126
|
email:
|
127
127
|
- frsyuki@gmail.com
|
@@ -148,9 +148,9 @@ files:
|
|
148
148
|
- classpath/commons-beanutils-core-1.8.3.jar
|
149
149
|
- classpath/commons-compress-1.10.jar
|
150
150
|
- classpath/commons-lang3-3.1.jar
|
151
|
-
- classpath/embulk-cli-0.8.
|
152
|
-
- classpath/embulk-core-0.8.
|
153
|
-
- classpath/embulk-standards-0.8.
|
151
|
+
- classpath/embulk-cli-0.8.14.jar
|
152
|
+
- classpath/embulk-core-0.8.14.jar
|
153
|
+
- classpath/embulk-standards-0.8.14.jar
|
154
154
|
- classpath/guava-18.0.jar
|
155
155
|
- classpath/guice-4.0.jar
|
156
156
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -166,7 +166,7 @@ files:
|
|
166
166
|
- classpath/joda-time-2.9.2.jar
|
167
167
|
- classpath/logback-classic-1.1.3.jar
|
168
168
|
- classpath/logback-core-1.1.3.jar
|
169
|
-
- classpath/msgpack-core-0.8.
|
169
|
+
- classpath/msgpack-core-0.8.11.jar
|
170
170
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
171
171
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
172
172
|
- classpath/slf4j-api-1.7.12.jar
|
@@ -467,6 +467,7 @@ files:
|
|
467
467
|
- embulk-docs/src/release/release-0.8.11.rst
|
468
468
|
- embulk-docs/src/release/release-0.8.12.rst
|
469
469
|
- embulk-docs/src/release/release-0.8.13.rst
|
470
|
+
- embulk-docs/src/release/release-0.8.14.rst
|
470
471
|
- embulk-docs/src/release/release-0.8.2.rst
|
471
472
|
- embulk-docs/src/release/release-0.8.3.rst
|
472
473
|
- embulk-docs/src/release/release-0.8.4.rst
|
@@ -567,6 +568,7 @@ files:
|
|
567
568
|
- lib/embulk/guess/bzip2.rb
|
568
569
|
- lib/embulk/guess/charset.rb
|
569
570
|
- lib/embulk/guess/csv.rb
|
571
|
+
- lib/embulk/guess/csv_all_strings.rb
|
570
572
|
- lib/embulk/guess/gzip.rb
|
571
573
|
- lib/embulk/guess/json.rb
|
572
574
|
- lib/embulk/guess/newline.rb
|
@@ -589,6 +591,7 @@ files:
|
|
589
591
|
- lib/embulk/schema.rb
|
590
592
|
- lib/embulk/version.rb
|
591
593
|
- settings.gradle
|
594
|
+
- test/guess/test_csv_all_strings.rb
|
592
595
|
- test/guess/test_csv_guess.rb
|
593
596
|
- test/guess/test_schema_guess.rb
|
594
597
|
- test/guess/test_time_format_guess.rb
|
@@ -614,11 +617,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
614
617
|
version: '0'
|
615
618
|
requirements: []
|
616
619
|
rubyforge_project:
|
617
|
-
rubygems_version: 2.6.
|
620
|
+
rubygems_version: 2.6.6
|
618
621
|
signing_key:
|
619
622
|
specification_version: 4
|
620
623
|
summary: Embulk, a plugin-based parallel bulk data loader
|
621
624
|
test_files:
|
625
|
+
- test/guess/test_csv_all_strings.rb
|
622
626
|
- test/guess/test_csv_guess.rb
|
623
627
|
- test/guess/test_schema_guess.rb
|
624
628
|
- test/guess/test_time_format_guess.rb
|