embulk 0.6.27 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/Gemfile.lock +6 -1
- data/README.md +4 -4
- data/bin/embulk +39 -17
- data/build.gradle +23 -8
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +11 -2
- data/embulk-core/build.gradle +9 -4
- data/embulk-core/src/main/java/org/embulk/EmbulkEmbed.java +160 -61
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +1 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePreviewPrinter.java +0 -1
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +5 -1
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +28 -7
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +1 -4
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +6 -3
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +0 -2
- data/embulk-core/src/main/java/org/embulk/config/TaskReport.java +29 -0
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +42 -45
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +4 -7
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +5 -6
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +0 -3
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +2 -2
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +26 -9
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +2 -4
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +12 -5
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +1 -1
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +26 -0
- data/embulk-core/src/main/java/org/embulk/guice/{CloseableInjectorProxy.java → LifeCycleInjectorProxy.java} +20 -6
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +10 -12
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +14 -1
- data/embulk-core/src/main/java/org/embulk/plugin/compat/InputPluginWrapper.java +102 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/PluginWrappers.java +30 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileInputWrapper.java +96 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileOutputWrapper.java +102 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalPageOutputWrapper.java +95 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +0 -2
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +8 -2
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +7 -1
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +12 -10
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +11 -9
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +0 -3
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +0 -5
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +19 -4
- data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +0 -3
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +0 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +0 -3
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +0 -3
- data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFileSerDe.java +0 -3
- data/embulk-core/src/main/java/org/embulk/spi/unit/ToStringMap.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +15 -12
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +0 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +7 -7
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +8 -8
- data/embulk-docs/src/built-in.rst +27 -30
- data/embulk-docs/src/conf.py +2 -2
- data/embulk-docs/src/release.rst +1 -2
- data/embulk-docs/src/release/release-0.7.0.rst +96 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +0 -4
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +0 -10
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -5
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +4 -5
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +4 -5
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -3
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +4 -6
- data/embulk.gemspec +14 -1
- data/lib/embulk.rb +59 -4
- data/lib/embulk/command/embulk_bundle.rb +56 -0
- data/lib/embulk/command/embulk_main.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +170 -0
- data/lib/embulk/command/embulk_new_plugin.rb +4 -1
- data/lib/embulk/command/embulk_run.rb +188 -227
- data/lib/embulk/data/bundle/.ruby-version +1 -0
- data/lib/embulk/data/bundle/Gemfile +4 -1
- data/lib/embulk/data/bundle/embulk/input/example.rb +4 -4
- data/lib/embulk/data/bundle/embulk/output/example.rb +4 -4
- data/lib/embulk/data/new/README.md.erb +1 -1
- data/lib/embulk/data/new/java/file_input.java.erb +4 -4
- data/lib/embulk/data/new/java/file_output.java.erb +2 -2
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/input.java.erb +3 -3
- data/lib/embulk/data/new/java/output.java.erb +2 -2
- data/lib/embulk/data/new/ruby/.ruby-version +1 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +1 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +3 -3
- data/lib/embulk/data/new/ruby/output.rb.erb +4 -4
- data/lib/embulk/data/package_data.rb +1 -15
- data/lib/embulk/guess/charset.rb +10 -1
- data/lib/embulk/input_plugin.rb +12 -12
- data/lib/embulk/java/bootstrap.rb +7 -4
- data/lib/embulk/java/imports.rb +34 -2
- data/lib/embulk/logger.rb +2 -0
- data/lib/embulk/output_plugin.rb +12 -12
- data/lib/embulk/page_builder.rb +1 -1
- data/lib/embulk/plugin.rb +0 -1
- data/lib/embulk/runner.rb +184 -0
- data/lib/embulk/version.rb +1 -1
- metadata +76 -55
- data/embulk-core/src/main/java/org/embulk/command/LiquidTemplate.java +0 -8
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +0 -369
- data/embulk-docs/src/release/release-0.6.26.rst +0 -17
- data/embulk-docs/src/release/release-0.6.27.rst +0 -11
- data/lib/embulk/command/embulk.rb +0 -47
- data/lib/embulk/data/bundle/Gemfile.lock +0 -8
- data/lib/embulk/exec.rb +0 -8
- data/lib/embulk/gems.rb +0 -29
- data/lib/embulk/java/liquid_helper.rb +0 -16
|
@@ -4,7 +4,6 @@ import java.util.List;
|
|
|
4
4
|
import com.google.common.collect.ImmutableList;
|
|
5
5
|
import org.embulk.config.TaskSource;
|
|
6
6
|
import org.embulk.config.ConfigSource;
|
|
7
|
-
import org.embulk.config.CommitReport;
|
|
8
7
|
import org.embulk.config.ConfigDiff;
|
|
9
8
|
import org.embulk.plugin.PluginType;
|
|
10
9
|
import org.embulk.spi.ExecSession;
|
|
@@ -13,7 +13,7 @@ import org.junit.Test;
|
|
|
13
13
|
import com.google.common.collect.ImmutableList;
|
|
14
14
|
import com.google.common.collect.ImmutableMap;
|
|
15
15
|
import org.embulk.EmbulkTestRuntime;
|
|
16
|
-
import org.embulk.config.
|
|
16
|
+
import org.embulk.config.TaskReport;
|
|
17
17
|
import org.embulk.config.ConfigSource;
|
|
18
18
|
import org.embulk.config.ConfigDiff;
|
|
19
19
|
import org.embulk.config.TaskSource;
|
|
@@ -62,7 +62,7 @@ public class TestFileInputRunner
|
|
|
62
62
|
@Override
|
|
63
63
|
public void cleanup(TaskSource taskSource,
|
|
64
64
|
int taskCount,
|
|
65
|
-
List<
|
|
65
|
+
List<TaskReport> successTaskReports)
|
|
66
66
|
{
|
|
67
67
|
}
|
|
68
68
|
|
|
@@ -95,7 +95,7 @@ public class TestFileInputRunner
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
@Override
|
|
98
|
-
public
|
|
98
|
+
public TaskReport commit()
|
|
99
99
|
{
|
|
100
100
|
transactionCompleted = true;
|
|
101
101
|
return null;
|
|
@@ -126,10 +126,10 @@ public class TestFileInputRunner
|
|
|
126
126
|
final MockPageOutput output = new MockPageOutput();
|
|
127
127
|
runner.transaction(config, new InputPlugin.Control()
|
|
128
128
|
{
|
|
129
|
-
public List<
|
|
129
|
+
public List<TaskReport> run(TaskSource inputTaskSource,
|
|
130
130
|
Schema schema, int taskCount)
|
|
131
131
|
{
|
|
132
|
-
List<
|
|
132
|
+
List<TaskReport> reports = new ArrayList<>();
|
|
133
133
|
reports.add(runner.run(inputTaskSource, schema, 0, output));
|
|
134
134
|
return reports;
|
|
135
135
|
}
|
|
@@ -180,10 +180,10 @@ public class TestFileInputRunner
|
|
|
180
180
|
try {
|
|
181
181
|
runner.transaction(config, new InputPlugin.Control()
|
|
182
182
|
{
|
|
183
|
-
public List<
|
|
183
|
+
public List<TaskReport> run(TaskSource inputTaskSource,
|
|
184
184
|
Schema schema, int taskCount)
|
|
185
185
|
{
|
|
186
|
-
List<
|
|
186
|
+
List<TaskReport> reports = new ArrayList<>();
|
|
187
187
|
reports.add(runner.run(inputTaskSource, schema, 0, output));
|
|
188
188
|
return reports;
|
|
189
189
|
}
|
|
@@ -6,7 +6,7 @@ import java.util.ArrayList;
|
|
|
6
6
|
import java.util.List;
|
|
7
7
|
|
|
8
8
|
import org.embulk.EmbulkTestRuntime;
|
|
9
|
-
import org.embulk.config.
|
|
9
|
+
import org.embulk.config.TaskReport;
|
|
10
10
|
import org.embulk.config.ConfigSource;
|
|
11
11
|
import org.embulk.config.ConfigDiff;
|
|
12
12
|
import org.embulk.config.Task;
|
|
@@ -52,7 +52,7 @@ public class TestFileOutputRunner
|
|
|
52
52
|
@Override
|
|
53
53
|
public void cleanup(TaskSource taskSource,
|
|
54
54
|
int taskCount,
|
|
55
|
-
List<
|
|
55
|
+
List<TaskReport> successTaskReports)
|
|
56
56
|
{
|
|
57
57
|
}
|
|
58
58
|
|
|
@@ -90,10 +90,10 @@ public class TestFileOutputRunner
|
|
|
90
90
|
}
|
|
91
91
|
|
|
92
92
|
@Override
|
|
93
|
-
public
|
|
93
|
+
public TaskReport commit()
|
|
94
94
|
{
|
|
95
95
|
transactionCompleted = true;
|
|
96
|
-
return Exec.
|
|
96
|
+
return Exec.newTaskReport();
|
|
97
97
|
}
|
|
98
98
|
};
|
|
99
99
|
}
|
|
@@ -122,7 +122,7 @@ public class TestFileOutputRunner
|
|
|
122
122
|
|
|
123
123
|
runner.transaction(config, schema, 1, new OutputPlugin.Control()
|
|
124
124
|
{
|
|
125
|
-
public List<
|
|
125
|
+
public List<TaskReport> run(final TaskSource outputTask)
|
|
126
126
|
{
|
|
127
127
|
TransactionalPageOutput tran = runner.open(outputTask, schema,
|
|
128
128
|
1);
|
|
@@ -142,7 +142,7 @@ public class TestFileOutputRunner
|
|
|
142
142
|
}
|
|
143
143
|
tran.close();
|
|
144
144
|
}
|
|
145
|
-
return new ArrayList<
|
|
145
|
+
return new ArrayList<TaskReport>();
|
|
146
146
|
}
|
|
147
147
|
});
|
|
148
148
|
|
|
@@ -181,7 +181,7 @@ public class TestFileOutputRunner
|
|
|
181
181
|
try {
|
|
182
182
|
runner.transaction(config, schema, 1, new OutputPlugin.Control()
|
|
183
183
|
{
|
|
184
|
-
public List<
|
|
184
|
+
public List<TaskReport> run(final TaskSource outputTask)
|
|
185
185
|
{
|
|
186
186
|
TransactionalPageOutput tran = runner.open(outputTask,
|
|
187
187
|
schema, 1);
|
|
@@ -196,7 +196,7 @@ public class TestFileOutputRunner
|
|
|
196
196
|
}
|
|
197
197
|
tran.close();
|
|
198
198
|
}
|
|
199
|
-
return new ArrayList<
|
|
199
|
+
return new ArrayList<TaskReport>();
|
|
200
200
|
}
|
|
201
201
|
});
|
|
202
202
|
} catch (NullPointerException npe) {
|
|
@@ -59,34 +59,33 @@ A configuration file consists of following sections:
|
|
|
59
59
|
In many cases, what you need to write is **in:**, **out**: and **formatter** sections only because ``guess`` command guesses **parser** and **decoder** options for you. See also the `Quick Start <https://github.com/embulk/embulk#quick-start>`_.
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
table: {{ env.pg_table }}
|
|
62
|
+
Using variables
|
|
63
|
+
~~~~~~~~~~~~~~~~~~
|
|
64
|
+
|
|
65
|
+
You can embed environment variables in configuration file using `Liquid template engine <http://liquidmarkup.org/>`_ (This is experimental feature. Behavior might change or be removed in future releases).
|
|
66
|
+
|
|
67
|
+
To use template engine, configuration file name must end with ``.yml.liquid``.
|
|
68
|
+
|
|
69
|
+
Environment variables are set to ``env`` variable.
|
|
70
|
+
|
|
71
|
+
.. code-block:: yaml
|
|
72
|
+
|
|
73
|
+
in:
|
|
74
|
+
type: file
|
|
75
|
+
path_prefix: {{ env.path_prefix }}
|
|
76
|
+
decoders:
|
|
77
|
+
- {type: gzip}
|
|
78
|
+
parser:
|
|
79
|
+
...
|
|
80
|
+
out:
|
|
81
|
+
type: postgresql
|
|
82
|
+
host: {{ env.pg_host }}
|
|
83
|
+
port: {{ env.pg_port }}
|
|
84
|
+
user: {{ env.pg_user }}
|
|
85
|
+
password: "{{ env.pg_password }}"
|
|
86
|
+
database: embulk_load
|
|
87
|
+
mode: insert
|
|
88
|
+
table: {{ env.pg_table }}
|
|
90
89
|
|
|
91
90
|
|
|
92
91
|
Local file input plugin
|
|
@@ -181,8 +180,6 @@ Options
|
|
|
181
180
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
182
181
|
| max\_quoted\_size\_limit | integer | Maximum number of bytes of a quoted value. If a value exceeds the limit, the row will be skipped | ``131072`` by default |
|
|
183
182
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
184
|
-
| stop\_on\_invalid\_record | boolean | Stop bulk load transaction if a file includes invalid record (such as invalid timestamp) | ``false`` by default |
|
|
185
|
-
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
186
183
|
| default\_timezone | string | Time zone of timestamp columns if the value itself doesn't include time zone description (eg. Asia/Tokyo) | ``UTC`` by default |
|
|
187
184
|
+----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
|
|
188
185
|
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
data/embulk-docs/src/conf.py
CHANGED
|
@@ -53,9 +53,9 @@ copyright = u'2015, Embulk Project'
|
|
|
53
53
|
# built documents.
|
|
54
54
|
#
|
|
55
55
|
# The short X.Y version.
|
|
56
|
-
version = '0.
|
|
56
|
+
version = '0.7'
|
|
57
57
|
# The full version, including alpha/beta/rc tags.
|
|
58
|
-
release = '0.
|
|
58
|
+
release = '0.7'
|
|
59
59
|
|
|
60
60
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
|
61
61
|
# for a list of supported languages.
|
data/embulk-docs/src/release.rst
CHANGED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Release 0.7.0
|
|
2
|
+
==================================
|
|
3
|
+
|
|
4
|
+
General Changes
|
|
5
|
+
------------------
|
|
6
|
+
|
|
7
|
+
* Upraded JRuby version to 9.0.0.0. Ruby scripting is compatible with Ruby 2.2 syntax.
|
|
8
|
+
|
|
9
|
+
* Added ``embulk migrate <plugin directory>`` subcommand. This command modifies plugin code to use the latest Embulk API.
|
|
10
|
+
|
|
11
|
+
* Enambed Liquid template engine. If configuration file name ends with ``.yml.liquid``, embulk embeds environment variables using Liquid template engine.
|
|
12
|
+
|
|
13
|
+
* Embulk gem package for JRuby doesn't include JRuby itself any more. Size of a gem package is reduced from 38MB to 7.6MB.
|
|
14
|
+
|
|
15
|
+
* Embulk gem is also released for CRuby. This enables us to install ``embulk`` command using ``gem install embulk``.
|
|
16
|
+
|
|
17
|
+
* **IMPORTANT**: ``embulk bundle`` command runs bundler. To create a new plugin bundle, use ``embulk bundle new <directory>`` command. To update gems, use ``embulk bundle`` command at the directory. Instructions are written at generated Gemfile file.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Ruby Plugin API
|
|
21
|
+
------------------
|
|
22
|
+
|
|
23
|
+
* Added experimental ``Embulk.setup`` and ``Embulk::Runner``.
|
|
24
|
+
|
|
25
|
+
This enables ruby scripts to run embulk easily. This is also good for test code. For example, you can use this code:
|
|
26
|
+
|
|
27
|
+
.. code-block:: ruby
|
|
28
|
+
|
|
29
|
+
require 'embulk'
|
|
30
|
+
Embulk.setup
|
|
31
|
+
Embulk::Runner.run(YAML.load_file("config.yml"))
|
|
32
|
+
|
|
33
|
+
* ``embulk new`` generates .ruby-version file with jruby-9.0.0.0 for ruby-based plugins.
|
|
34
|
+
|
|
35
|
+
This makes plugin development easy as following:
|
|
36
|
+
|
|
37
|
+
.. code-block:: console
|
|
38
|
+
|
|
39
|
+
# 1. Create plugin template
|
|
40
|
+
$ embulk new ruby-parser awesome
|
|
41
|
+
$ cd embulk-parser-awesome
|
|
42
|
+
# or upgrade existent plugin: embulk migrate embulk-parser-awesome
|
|
43
|
+
|
|
44
|
+
# 2. Install dependency gems including embulk itself at vendor/bundle directory
|
|
45
|
+
$ bundle install --path vendor/bundle
|
|
46
|
+
|
|
47
|
+
# 3. Create an example configuration file
|
|
48
|
+
$ vi config.yml
|
|
49
|
+
|
|
50
|
+
# 4. You can run embulk without building & installing gem
|
|
51
|
+
$ bundle exec embulk run config.yml
|
|
52
|
+
|
|
53
|
+
* Constants defined at ``Embulk::Java`` are deprecated. They're still kept for backward compatibility but will be removed at a future release.
|
|
54
|
+
|
|
55
|
+
* Added ``Embulk::Java::Config`` and ``Embulk::SPI`` namespaces to access Java classes.
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
Java Plugin API
|
|
59
|
+
------------------
|
|
60
|
+
|
|
61
|
+
* **IMPORTANT**: Renamed CommitReport class to TaskReport. Binary backward compatibility is kept so that old plugins built with embulk 0.6.x can run with embulk 0.7.0. But this backward compatibility code will be removed at future release.
|
|
62
|
+
|
|
63
|
+
To upgrade your plugin code, you can use ``embulk migrate <plugin directory>`` command.
|
|
64
|
+
|
|
65
|
+
* Upgraded gradle version to 2.6. This version supports ``./gradlew -t <task>`` command that watches changes of files and rebuild continuously.
|
|
66
|
+
|
|
67
|
+
This makes plugin development easy as following:
|
|
68
|
+
|
|
69
|
+
.. code-block:: console
|
|
70
|
+
|
|
71
|
+
# 1. Create plugin template
|
|
72
|
+
$ embulk new java-input awesome
|
|
73
|
+
$ cd embulk-input-awesome
|
|
74
|
+
# or upgrade existent plugin: embulk migrate embulk-input-awesome
|
|
75
|
+
|
|
76
|
+
# 2. Build code continously
|
|
77
|
+
$ ./gradlew -t package
|
|
78
|
+
|
|
79
|
+
# 3. Create an example configuration file
|
|
80
|
+
$ vi config.yml
|
|
81
|
+
|
|
82
|
+
# 4. Run embulk with -L option
|
|
83
|
+
$ embulk -L . run config.yml
|
|
84
|
+
|
|
85
|
+
* Added ``EmbulkEmbed.Bootstrap`` class to build ``EmbulkEmbed`` instance.
|
|
86
|
+
|
|
87
|
+
* Added ``ConfigLoader.fromJsonString(String)`` and ``ConfigLoader.fromYamlString(String)`` methods.
|
|
88
|
+
|
|
89
|
+
* Added guess, preview, and run methods at ``EmbulkEmbed`` don't need ExecSession instance any more.
|
|
90
|
+
|
|
91
|
+
* EmbulkService is now deprecated. Replacement is EmbulkEmbed.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
Release Date
|
|
95
|
+
------------------
|
|
96
|
+
2015-08-18
|
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
package org.embulk.standards;
|
|
2
2
|
|
|
3
3
|
import com.google.common.base.Optional;
|
|
4
|
-
import com.google.common.collect.ImmutableBiMap;
|
|
5
|
-
import com.google.common.collect.ImmutableMap;
|
|
6
4
|
import org.embulk.config.Config;
|
|
7
5
|
import org.embulk.config.ConfigDefault;
|
|
8
|
-
import org.embulk.spi.type.TimestampType;
|
|
9
6
|
import org.embulk.spi.time.Timestamp;
|
|
10
7
|
import org.embulk.spi.time.TimestampFormatter;
|
|
11
8
|
import org.embulk.config.Task;
|
|
@@ -18,7 +15,6 @@ import org.embulk.spi.FormatterPlugin;
|
|
|
18
15
|
import org.embulk.spi.Page;
|
|
19
16
|
import org.embulk.spi.PageOutput;
|
|
20
17
|
import org.embulk.spi.PageReader;
|
|
21
|
-
import org.embulk.spi.Exec;
|
|
22
18
|
import org.embulk.spi.FileOutput;
|
|
23
19
|
import org.embulk.spi.util.LineEncoder;
|
|
24
20
|
import org.embulk.spi.util.Timestamps;
|
|
@@ -11,11 +11,9 @@ import org.embulk.config.ConfigDefault;
|
|
|
11
11
|
import org.embulk.config.ConfigSource;
|
|
12
12
|
import org.embulk.config.ConfigException;
|
|
13
13
|
import org.embulk.config.TaskSource;
|
|
14
|
-
import org.embulk.spi.type.TimestampType;
|
|
15
14
|
import org.embulk.spi.time.TimestampParser;
|
|
16
15
|
import org.embulk.spi.time.TimestampParseException;
|
|
17
16
|
import org.embulk.spi.Column;
|
|
18
|
-
import org.embulk.spi.ColumnConfig;
|
|
19
17
|
import org.embulk.spi.Schema;
|
|
20
18
|
import org.embulk.spi.SchemaConfig;
|
|
21
19
|
import org.embulk.spi.ColumnVisitor;
|
|
@@ -91,10 +89,6 @@ public class CsvParserPlugin
|
|
|
91
89
|
@Config("allow_extra_columns")
|
|
92
90
|
@ConfigDefault("false")
|
|
93
91
|
boolean getAllowExtraColumns();
|
|
94
|
-
|
|
95
|
-
@Config("stop_on_invalid_record")
|
|
96
|
-
@ConfigDefault("false")
|
|
97
|
-
boolean getStopOnInvalidRecord();
|
|
98
92
|
}
|
|
99
93
|
|
|
100
94
|
public static class QuoteCharacter
|
|
@@ -235,7 +229,6 @@ public class CsvParserPlugin
|
|
|
235
229
|
final String nullStringOrNull = task.getNullString().orNull();
|
|
236
230
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
|
237
231
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
|
238
|
-
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
|
239
232
|
int skipHeaderLines = task.getSkipHeaderLines();
|
|
240
233
|
|
|
241
234
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
|
@@ -359,9 +352,6 @@ public class CsvParserPlugin
|
|
|
359
352
|
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
|
|
360
353
|
long lineNumber = tokenizer.getCurrentLineNumber();
|
|
361
354
|
String skippedLine = tokenizer.skipCurrentLine();
|
|
362
|
-
if (stopOnInvalidRecord) {
|
|
363
|
-
throw new ConfigException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
|
|
364
|
-
}
|
|
365
355
|
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
|
|
366
356
|
//exec.notice().skippedLine(skippedLine);
|
|
367
357
|
|
|
@@ -5,9 +5,7 @@ import java.util.List;
|
|
|
5
5
|
import java.util.ArrayList;
|
|
6
6
|
import java.util.Deque;
|
|
7
7
|
import java.util.ArrayDeque;
|
|
8
|
-
import org.embulk.config.ConfigException;
|
|
9
8
|
import org.embulk.spi.util.LineDecoder;
|
|
10
|
-
import org.embulk.spi.Exec;
|
|
11
9
|
|
|
12
10
|
public class CsvTokenizer
|
|
13
11
|
{
|
|
@@ -71,9 +69,7 @@ public class CsvTokenizer
|
|
|
71
69
|
// recover lines of quoted value
|
|
72
70
|
skippedLine = quotedValueLines.remove(0); // TODO optimize performance
|
|
73
71
|
unreadLines.addAll(quotedValueLines);
|
|
74
|
-
|
|
75
|
-
unreadLines.add(line);
|
|
76
|
-
}
|
|
72
|
+
unreadLines.add(line);
|
|
77
73
|
lineNumber -= quotedValueLines.size();
|
|
78
74
|
quotedValueLines.clear();
|
|
79
75
|
}
|
|
@@ -13,7 +13,6 @@ import java.nio.file.Files;
|
|
|
13
13
|
import java.nio.file.SimpleFileVisitor;
|
|
14
14
|
import java.nio.file.FileVisitResult;
|
|
15
15
|
import java.nio.file.attribute.BasicFileAttributes;
|
|
16
|
-
import javax.validation.constraints.NotNull;
|
|
17
16
|
import com.google.common.collect.ImmutableList;
|
|
18
17
|
import com.google.common.base.Optional;
|
|
19
18
|
import org.embulk.config.Config;
|
|
@@ -23,7 +22,7 @@ import org.embulk.config.Task;
|
|
|
23
22
|
import org.embulk.config.TaskSource;
|
|
24
23
|
import org.embulk.config.ConfigSource;
|
|
25
24
|
import org.embulk.config.ConfigDiff;
|
|
26
|
-
import org.embulk.config.
|
|
25
|
+
import org.embulk.config.TaskReport;
|
|
27
26
|
import org.embulk.spi.BufferAllocator;
|
|
28
27
|
import org.embulk.spi.Exec;
|
|
29
28
|
import org.embulk.spi.FileInputPlugin;
|
|
@@ -100,7 +99,7 @@ public class LocalFileInputPlugin
|
|
|
100
99
|
@Override
|
|
101
100
|
public void cleanup(TaskSource taskSource,
|
|
102
101
|
int taskCount,
|
|
103
|
-
List<
|
|
102
|
+
List<TaskReport> successTaskReports)
|
|
104
103
|
{ }
|
|
105
104
|
|
|
106
105
|
public List<String> listFiles(PluginTask task)
|
|
@@ -195,9 +194,9 @@ public class LocalFileInputPlugin
|
|
|
195
194
|
{ }
|
|
196
195
|
|
|
197
196
|
@Override
|
|
198
|
-
public
|
|
197
|
+
public TaskReport commit()
|
|
199
198
|
{
|
|
200
|
-
return Exec.
|
|
199
|
+
return Exec.newTaskReport();
|
|
201
200
|
}
|
|
202
201
|
};
|
|
203
202
|
}
|
|
@@ -4,7 +4,6 @@ import java.io.File;
|
|
|
4
4
|
import java.io.FileNotFoundException;
|
|
5
5
|
import java.io.FileOutputStream;
|
|
6
6
|
import java.io.IOException;
|
|
7
|
-
import java.io.OutputStream;
|
|
8
7
|
import java.util.ArrayList;
|
|
9
8
|
import java.util.List;
|
|
10
9
|
import java.util.Locale;
|
|
@@ -14,7 +13,7 @@ import org.embulk.config.ConfigException;
|
|
|
14
13
|
import org.embulk.config.ConfigDefault;
|
|
15
14
|
import org.embulk.config.ConfigSource;
|
|
16
15
|
import org.embulk.config.ConfigDiff;
|
|
17
|
-
import org.embulk.config.
|
|
16
|
+
import org.embulk.config.TaskReport;
|
|
18
17
|
import org.embulk.config.Task;
|
|
19
18
|
import org.embulk.config.TaskSource;
|
|
20
19
|
import org.embulk.spi.Buffer;
|
|
@@ -70,7 +69,7 @@ public class LocalFileOutputPlugin
|
|
|
70
69
|
@Override
|
|
71
70
|
public void cleanup(TaskSource taskSource,
|
|
72
71
|
int taskCount,
|
|
73
|
-
List<
|
|
72
|
+
List<TaskReport> successTaskReports)
|
|
74
73
|
{ }
|
|
75
74
|
|
|
76
75
|
@Override
|
|
@@ -136,9 +135,9 @@ public class LocalFileOutputPlugin
|
|
|
136
135
|
public void abort()
|
|
137
136
|
{ }
|
|
138
137
|
|
|
139
|
-
public
|
|
138
|
+
public TaskReport commit()
|
|
140
139
|
{
|
|
141
|
-
|
|
140
|
+
TaskReport report = Exec.newTaskReport();
|
|
142
141
|
// TODO better setting for Report
|
|
143
142
|
// report.set("file_names", fileNames);
|
|
144
143
|
// report.set("file_sizes", fileSizes);
|