embulk 0.8.13 → 0.8.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +7 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +4 -1
- data/embulk-docs/src/_static/embulk-logo.png +0 -0
- data/embulk-docs/src/built-in.rst +37 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.14.rst +31 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +40 -3
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +52 -0
- data/embulk.gemspec +1 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +1 -1
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/file_input_plugin.rb +1 -1
- data/lib/embulk/file_output_plugin.rb +1 -1
- data/lib/embulk/guess/csv.rb +9 -5
- data/lib/embulk/guess/csv_all_strings.rb +13 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java_plugin.rb +8 -8
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_all_strings.rb +43 -0
- metadata +12 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20a4f8e68f013bad287fa60a52982eacdad43667
|
4
|
+
data.tar.gz: 656680f1b59861633ae2f5575b46359de58c44b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f70f2fdcc4856b718fc4825bede8aa8b9e28022aed398df5e5a82b86754dfd9ef29e1b9ea3db92d2c9f0f1094cf0b9110550e9b2c4edfeaee67d0e18a2fe6a4
|
7
|
+
data.tar.gz: 678b00d7dc6cfa0703659c7fdf028aed9f7100246dbfbc186d5c4309f72f85a87e4ae4e0a45074996d2179e360a7169f57c3997bdc3348922518a14c64996f39
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.8.
|
5
|
-
jruby-jars (= 9.1.
|
4
|
+
embulk (0.8.13)
|
5
|
+
jruby-jars (= 9.1.5.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.1.
|
10
|
+
jruby-jars (9.1.5.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
@@ -27,4 +27,4 @@ DEPENDENCIES
|
|
27
27
|
yard (~> 0.8.7)
|
28
28
|
|
29
29
|
BUNDLED WITH
|
30
|
-
1.
|
30
|
+
1.13.2
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.14'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.1.
|
22
|
+
jrubyVersion = '9.1.5.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -38,7 +38,7 @@ dependencies {
|
|
38
38
|
compile 'joda-time:joda-time:2.9.2'
|
39
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
40
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
-
compile 'org.msgpack:msgpack-core:0.8.
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.11'
|
42
42
|
|
43
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
44
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -31,14 +31,19 @@ public class Schema
|
|
31
31
|
return new Builder();
|
32
32
|
}
|
33
33
|
|
34
|
-
private final
|
34
|
+
private final ImmutableList<Column> columns;
|
35
35
|
|
36
36
|
@JsonCreator
|
37
37
|
public Schema(List<Column> columns)
|
38
38
|
{
|
39
|
-
this.columns = columns;
|
39
|
+
this.columns = ImmutableList.copyOf(columns);
|
40
40
|
}
|
41
41
|
|
42
|
+
/**
|
43
|
+
* Returns the list of Column objects.
|
44
|
+
*
|
45
|
+
* It always returns an immutable list.
|
46
|
+
*/
|
42
47
|
@JsonValue
|
43
48
|
public List<Column> getColumns()
|
44
49
|
{
|
@@ -32,7 +32,10 @@ public class ResumableInputStream
|
|
32
32
|
private void reopen(Exception closedCause) throws IOException
|
33
33
|
{
|
34
34
|
if (in != null) {
|
35
|
-
|
35
|
+
try {
|
36
|
+
in.close();
|
37
|
+
} catch (IOException ignored) {
|
38
|
+
}
|
36
39
|
in = null;
|
37
40
|
}
|
38
41
|
in = reopener.reopen(offset, closedCause);
|
Binary file
|
@@ -633,3 +633,40 @@ Example
|
|
633
633
|
out:
|
634
634
|
type: ...
|
635
635
|
...
|
636
|
+
|
637
|
+
Guess executor
|
638
|
+
------------------
|
639
|
+
|
640
|
+
The guess executor is called by ``guess`` command. It executes default guess plugins in a sequential order and suggests Embulk config by appropriate guess plugin. The default guess plugins and the order are ``gzip``, ``'bzip2``, ``json`` and ``csv``.
|
641
|
+
|
642
|
+
Options
|
643
|
+
~~~~~~~~~~~~~~~~~~
|
644
|
+
|
645
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
646
|
+
| name | type | description | required? |
|
647
|
+
+=======================+==========+======================================================================+======================================+
|
648
|
+
| guess_plugins | array | ``guess`` command uses specified guess plugins. | ``[]`` by default |
|
649
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
650
|
+
| exclude_guess_plugins | array | ``guess`` command doesn't use specified plugins. | ``[]`` by default |
|
651
|
+
+-----------------------+----------+----------------------------------------------------------------------+--------------------------------------+
|
652
|
+
|
653
|
+
The ``guess_plugins`` option includes specified guess plugin in the bottom of the list of default guess plugins.
|
654
|
+
|
655
|
+
The ``exclude_guess_plugins`` option exclude specified guess plugins from the list of default guess plugins that the guess executor uses.
|
656
|
+
|
657
|
+
This example shows how to use ``csv_all_strings`` guess plugin, which suggests column types within CSV files as string types. It needs to be explicitly specified by users when it's used instead of ``csv`` guess plugin because the plugin is not included in default guess plugins. We also can exclude default ``csv`` guess plugin.
|
658
|
+
|
659
|
+
Example
|
660
|
+
~~~~~~~~~~~~~~~~~~
|
661
|
+
|
662
|
+
.. code-block:: yaml
|
663
|
+
|
664
|
+
exec:
|
665
|
+
guess_plugins: ['csv_all_strings']
|
666
|
+
exclude_guess_plugins: ['csv']
|
667
|
+
in:
|
668
|
+
type: ...
|
669
|
+
...
|
670
|
+
out:
|
671
|
+
type: ...
|
672
|
+
...
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
Release 0.8.14
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Improved IOException handling within ResumableInputStream#reopen method. [#470]
|
8
|
+
|
9
|
+
* Added "rules" config option to rename filter plugin. It allows users to specify rule based policy to rename columns. [#483]
|
10
|
+
|
11
|
+
* Added "csv_all_strings" guess plugin, which suggests columns in csv files as string typed columns. [#489]
|
12
|
+
|
13
|
+
* Updated JRuby from 9.1.2.0 to 9.1.5.0. Release notes:
|
14
|
+
|
15
|
+
* http://jruby.org/2016/08/30/jruby-9-1-3-0.html
|
16
|
+
|
17
|
+
* http://jruby.org/2016/09/01/jruby-9-1-4-0.html
|
18
|
+
|
19
|
+
* http://jruby.org/2016/09/06/jruby-9-1-5-0.html
|
20
|
+
|
21
|
+
* Updated msgpack-java from 0.8.8 to 0.8.11. Release notes
|
22
|
+
|
23
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.9/RELEASE_NOTES.md
|
24
|
+
|
25
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.10/RELEASE_NOTES.md
|
26
|
+
|
27
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.11/RELEASE_NOTES.md
|
28
|
+
|
29
|
+
Release Date
|
30
|
+
------------------
|
31
|
+
2016-10-21
|
@@ -2,6 +2,7 @@ package org.embulk.standards;
|
|
2
2
|
|
3
3
|
import org.embulk.config.Config;
|
4
4
|
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.ConfigException;
|
5
6
|
import org.embulk.config.ConfigSource;
|
6
7
|
import org.embulk.config.Task;
|
7
8
|
import org.embulk.config.TaskSource;
|
@@ -10,8 +11,10 @@ import org.embulk.spi.FilterPlugin;
|
|
10
11
|
import org.embulk.spi.PageOutput;
|
11
12
|
import org.embulk.spi.Schema;
|
12
13
|
|
14
|
+
import java.util.List;
|
13
15
|
import java.util.Map;
|
14
16
|
|
17
|
+
|
15
18
|
public class RenameFilterPlugin
|
16
19
|
implements FilterPlugin
|
17
20
|
{
|
@@ -21,6 +24,10 @@ public class RenameFilterPlugin
|
|
21
24
|
@Config("columns")
|
22
25
|
@ConfigDefault("{}")
|
23
26
|
Map<String, String> getRenameMap();
|
27
|
+
|
28
|
+
@Config("rules")
|
29
|
+
@ConfigDefault("[]")
|
30
|
+
List<ConfigSource> getRulesList();
|
24
31
|
}
|
25
32
|
|
26
33
|
@Override
|
@@ -29,12 +36,14 @@ public class RenameFilterPlugin
|
|
29
36
|
{
|
30
37
|
PluginTask task = config.loadConfig(PluginTask.class);
|
31
38
|
Map<String, String> renameMap = task.getRenameMap();
|
39
|
+
List<ConfigSource> rulesList = task.getRulesList();
|
32
40
|
|
33
|
-
//
|
41
|
+
// Check if the given column in "columns" exists or not.
|
34
42
|
for (String columnName : renameMap.keySet()) {
|
35
43
|
inputSchema.lookupColumn(columnName); // throws SchemaConfigException
|
36
44
|
}
|
37
45
|
|
46
|
+
// Rename by "columns": to be applied before "rules".
|
38
47
|
Schema.Builder builder = Schema.builder();
|
39
48
|
for (Column column : inputSchema.getColumns()) {
|
40
49
|
String name = column.getName();
|
@@ -43,8 +52,16 @@ public class RenameFilterPlugin
|
|
43
52
|
}
|
44
53
|
builder.add(name, column.getType());
|
45
54
|
}
|
55
|
+
Schema intermediateSchema = builder.build();
|
56
|
+
|
57
|
+
// Rename by "rules".
|
58
|
+
Schema outputSchema = intermediateSchema;
|
59
|
+
for (ConfigSource rule : rulesList) {
|
60
|
+
outputSchema = applyRule(rule, intermediateSchema);
|
61
|
+
intermediateSchema = outputSchema;
|
62
|
+
}
|
46
63
|
|
47
|
-
control.run(task.dump(),
|
64
|
+
control.run(task.dump(), outputSchema);
|
48
65
|
}
|
49
66
|
|
50
67
|
@Override
|
@@ -53,4 +70,24 @@ public class RenameFilterPlugin
|
|
53
70
|
{
|
54
71
|
return output;
|
55
72
|
}
|
56
|
-
|
73
|
+
|
74
|
+
|
75
|
+
// Extending Task is required to be deserialized with ConfigSource.loadConfig()
|
76
|
+
// although this Rule is not really a Task.
|
77
|
+
// TODO(dmikurube): Revisit this to consider how not to extend Task for this.
|
78
|
+
private interface Rule
|
79
|
+
extends Task
|
80
|
+
{
|
81
|
+
@Config("rule")
|
82
|
+
String getRule();
|
83
|
+
}
|
84
|
+
|
85
|
+
private Schema applyRule(ConfigSource ruleConfig, Schema inputSchema) throws ConfigException
|
86
|
+
{
|
87
|
+
Rule rule = ruleConfig.loadConfig(Rule.class);
|
88
|
+
switch (rule.getRule()) {
|
89
|
+
default:
|
90
|
+
throw new ConfigException("Renaming rule \"" +rule+ "\" is unknown");
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
@@ -1,7 +1,9 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
|
+
import com.google.common.collect.ImmutableList;
|
3
4
|
import com.google.common.collect.ImmutableMap;
|
4
5
|
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigException;
|
5
7
|
import org.embulk.config.ConfigSource;
|
6
8
|
import org.embulk.config.TaskSource;
|
7
9
|
import org.embulk.spi.Column;
|
@@ -85,4 +87,54 @@ public class TestRenameFilterPlugin
|
|
85
87
|
}
|
86
88
|
});
|
87
89
|
}
|
90
|
+
|
91
|
+
@Test
|
92
|
+
public void checkConfigExceptionIfUnknownStringTypeOfRenamingOperator()
|
93
|
+
{
|
94
|
+
// A simple string shouldn't come as a renaming rule.
|
95
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
96
|
+
.set("rules", ImmutableList.of("string_rule"));
|
97
|
+
|
98
|
+
try {
|
99
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
100
|
+
public void run(TaskSource task, Schema schema) { }
|
101
|
+
});
|
102
|
+
fail();
|
103
|
+
} catch (Throwable t) {
|
104
|
+
assertTrue(t instanceof ConfigException);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
@Test
|
109
|
+
public void checkConfigExceptionIfUnknownListTypeOfRenamingOperator()
|
110
|
+
{
|
111
|
+
// A list [] shouldn't come as a renaming rule.
|
112
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
113
|
+
.set("rules", ImmutableList.of(ImmutableList.of("listed_operator1", "listed_operator2")));
|
114
|
+
|
115
|
+
try {
|
116
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
117
|
+
public void run(TaskSource task, Schema schema) { }
|
118
|
+
});
|
119
|
+
fail();
|
120
|
+
} catch (Throwable t) {
|
121
|
+
assertTrue(t instanceof ConfigException);
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
@Test
|
126
|
+
public void checkConfigExceptionIfUnknownRenamingOperatorName()
|
127
|
+
{
|
128
|
+
ConfigSource pluginConfig = Exec.newConfigSource()
|
129
|
+
.set("rules", ImmutableList.of(ImmutableMap.of("rule", "some_unknown_renaming_operator")));
|
130
|
+
|
131
|
+
try {
|
132
|
+
filter.transaction(pluginConfig, SCHEMA, new FilterPlugin.Control() {
|
133
|
+
public void run(TaskSource task, Schema schema) { }
|
134
|
+
});
|
135
|
+
fail();
|
136
|
+
} catch (Throwable t) {
|
137
|
+
assertTrue(t instanceof ConfigException);
|
138
|
+
}
|
139
|
+
}
|
88
140
|
}
|
data/embulk.gemspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.1.
|
1
|
+
jruby-9.1.5.0
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.1.
|
1
|
+
jruby-9.1.5.0
|
@@ -13,7 +13,7 @@ module Embulk
|
|
13
13
|
module RubyAdapter
|
14
14
|
module ClassMethods
|
15
15
|
def new_java
|
16
|
-
Java::FileInputRunner.new(Java.injector.getInstance(
|
16
|
+
Java::FileInputRunner.new(Java.injector.getInstance(plugin_java_class))
|
17
17
|
end
|
18
18
|
# TODO transaction, resume, cleanup
|
19
19
|
end
|
@@ -13,7 +13,7 @@ module Embulk
|
|
13
13
|
module RubyAdapter
|
14
14
|
module ClassMethods
|
15
15
|
def new_java
|
16
|
-
Java::FileOutputRunner.new(Java.injector.getInstance(
|
16
|
+
Java::FileOutputRunner.new(Java.injector.getInstance(plugin_java_class))
|
17
17
|
end
|
18
18
|
# TODO transaction, resume, cleanup
|
19
19
|
end
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -142,11 +142,7 @@ module Embulk
|
|
142
142
|
schema = []
|
143
143
|
column_names.zip(other_types).each do |name,type|
|
144
144
|
if name && type
|
145
|
-
|
146
|
-
schema << {"name" => name, "type" => type, "format" => type.format}
|
147
|
-
else
|
148
|
-
schema << {"name" => name, "type" => type}
|
149
|
-
end
|
145
|
+
schema << new_column(name, type)
|
150
146
|
end
|
151
147
|
end
|
152
148
|
parser_guessed["columns"] = schema
|
@@ -154,6 +150,14 @@ module Embulk
|
|
154
150
|
return {"parser" => parser_guessed}
|
155
151
|
end
|
156
152
|
|
153
|
+
def new_column(name, type)
|
154
|
+
if type.is_a?(SchemaGuess::TimestampTypeMatch)
|
155
|
+
{"name" => name, "type" => type, "format" => type.format}
|
156
|
+
else
|
157
|
+
{"name" => name, "type" => type}
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
157
161
|
private
|
158
162
|
|
159
163
|
def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
require 'embulk/guess/csv'
|
4
|
+
|
5
|
+
class CsvAllStringsGuessPlugin < CsvGuessPlugin
|
6
|
+
Plugin.register_guess("csv_all_strings", self)
|
7
|
+
|
8
|
+
def new_column(name, type)
|
9
|
+
{"name" => name, "type" => "string"}
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -37,7 +37,7 @@ module Embulk
|
|
37
37
|
def guess(config, sample)
|
38
38
|
java_config = config.to_java
|
39
39
|
java_sample = sample.to_java
|
40
|
-
java_config_diff =
|
40
|
+
java_config_diff = plugin_java_object.guess(java_config, java_sample)
|
41
41
|
return DataSource.from_java(java_config_diff)
|
42
42
|
end
|
43
43
|
end
|
data/lib/embulk/java_plugin.rb
CHANGED
@@ -57,20 +57,20 @@ module Embulk
|
|
57
57
|
|
58
58
|
def self.ruby_adapter_class(java_class, ruby_base_class, ruby_module)
|
59
59
|
Class.new(ruby_base_class) do
|
60
|
-
const_set(:
|
60
|
+
const_set(:PLUGIN_JAVA_CLASS, java_class)
|
61
61
|
|
62
62
|
include ruby_module
|
63
63
|
extend ruby_module::ClassMethods
|
64
64
|
|
65
|
-
unless method_defined?(:
|
66
|
-
def
|
67
|
-
@
|
65
|
+
unless method_defined?(:plugin_java_object)
|
66
|
+
def plugin_java_object
|
67
|
+
@plugin_java_object ||= self.class.new_java
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
unless (class<<self;self;end).method_defined?(:
|
72
|
-
def self.
|
73
|
-
self::
|
71
|
+
unless (class<<self;self;end).method_defined?(:plugin_java_class)
|
72
|
+
def self.plugin_java_class
|
73
|
+
self::PLUGIN_JAVA_CLASS
|
74
74
|
end
|
75
75
|
end
|
76
76
|
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
# ruby_module::ClassMethods includes other modules.
|
82
82
|
unless ruby_module::ClassMethods.method_defined?(:new_java)
|
83
83
|
def self.new_java
|
84
|
-
Java.injector.getInstance(
|
84
|
+
Java.injector.getInstance(plugin_java_class)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
data/lib/embulk/version.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'time'
|
3
|
+
require 'embulk/guess/csv_all_strings'
|
4
|
+
|
5
|
+
class CsvAllStringsGuessTest < ::Test::Unit::TestCase
|
6
|
+
class TestAllStrings < self
|
7
|
+
def test_columns_without_header
|
8
|
+
actual = guess([
|
9
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
10
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
11
|
+
])
|
12
|
+
expected = [
|
13
|
+
{"name" => "c0", "type" => "string"},
|
14
|
+
{"name" => "c1", "type" => "string"},
|
15
|
+
{"name" => "c2", "type" => "string"},
|
16
|
+
]
|
17
|
+
assert_equal expected, actual["parser"]["columns"]
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_columns_with_header
|
21
|
+
actual = guess([
|
22
|
+
"num\tstr\ttime",
|
23
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
24
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
25
|
+
])
|
26
|
+
expected = [
|
27
|
+
{"name" => "num", "type" => "string"},
|
28
|
+
{"name" => "str", "type" => "string"},
|
29
|
+
{"name" => "time", "type" => "string"},
|
30
|
+
]
|
31
|
+
assert_equal expected, actual["parser"]["columns"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def guess(texts)
|
36
|
+
conf = Embulk::DataSource.new({
|
37
|
+
parser: {
|
38
|
+
type: "csv"
|
39
|
+
}
|
40
|
+
})
|
41
|
+
Embulk::Guess::CsvAllStringsGuessPlugin.new.guess_lines(conf, Array(texts))
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jruby-jars
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 9.1.
|
19
|
+
version: 9.1.5.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 9.1.
|
26
|
+
version: 9.1.5.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,9 +108,9 @@ files:
|
|
108
108
|
- classpath/commons-beanutils-core-1.8.3.jar
|
109
109
|
- classpath/commons-compress-1.10.jar
|
110
110
|
- classpath/commons-lang3-3.1.jar
|
111
|
-
- classpath/embulk-cli-0.8.
|
112
|
-
- classpath/embulk-core-0.8.
|
113
|
-
- classpath/embulk-standards-0.8.
|
111
|
+
- classpath/embulk-cli-0.8.14.jar
|
112
|
+
- classpath/embulk-core-0.8.14.jar
|
113
|
+
- classpath/embulk-standards-0.8.14.jar
|
114
114
|
- classpath/guava-18.0.jar
|
115
115
|
- classpath/guice-4.0.jar
|
116
116
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -126,7 +126,7 @@ files:
|
|
126
126
|
- classpath/joda-time-2.9.2.jar
|
127
127
|
- classpath/logback-classic-1.1.3.jar
|
128
128
|
- classpath/logback-core-1.1.3.jar
|
129
|
-
- classpath/msgpack-core-0.8.
|
129
|
+
- classpath/msgpack-core-0.8.11.jar
|
130
130
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
131
131
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
132
132
|
- classpath/slf4j-api-1.7.12.jar
|
@@ -427,6 +427,7 @@ files:
|
|
427
427
|
- embulk-docs/src/release/release-0.8.11.rst
|
428
428
|
- embulk-docs/src/release/release-0.8.12.rst
|
429
429
|
- embulk-docs/src/release/release-0.8.13.rst
|
430
|
+
- embulk-docs/src/release/release-0.8.14.rst
|
430
431
|
- embulk-docs/src/release/release-0.8.2.rst
|
431
432
|
- embulk-docs/src/release/release-0.8.3.rst
|
432
433
|
- embulk-docs/src/release/release-0.8.4.rst
|
@@ -527,6 +528,7 @@ files:
|
|
527
528
|
- lib/embulk/guess/bzip2.rb
|
528
529
|
- lib/embulk/guess/charset.rb
|
529
530
|
- lib/embulk/guess/csv.rb
|
531
|
+
- lib/embulk/guess/csv_all_strings.rb
|
530
532
|
- lib/embulk/guess/gzip.rb
|
531
533
|
- lib/embulk/guess/json.rb
|
532
534
|
- lib/embulk/guess/newline.rb
|
@@ -549,6 +551,7 @@ files:
|
|
549
551
|
- lib/embulk/schema.rb
|
550
552
|
- lib/embulk/version.rb
|
551
553
|
- settings.gradle
|
554
|
+
- test/guess/test_csv_all_strings.rb
|
552
555
|
- test/guess/test_csv_guess.rb
|
553
556
|
- test/guess/test_schema_guess.rb
|
554
557
|
- test/guess/test_time_format_guess.rb
|
@@ -579,9 +582,9 @@ signing_key:
|
|
579
582
|
specification_version: 4
|
580
583
|
summary: Embulk, a plugin-based parallel bulk data loader
|
581
584
|
test_files:
|
585
|
+
- test/guess/test_csv_all_strings.rb
|
582
586
|
- test/guess/test_csv_guess.rb
|
583
587
|
- test/guess/test_schema_guess.rb
|
584
588
|
- test/guess/test_time_format_guess.rb
|
585
589
|
- test/helper.rb
|
586
590
|
- test/run-test.rb
|
587
|
-
has_rdoc: false
|