embulk 0.8.18-java → 0.8.19-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -0
- data/build.gradle +10 -3
- data/embulk-cli/build.gradle +2 -0
- data/embulk-cli/src/main/bat/selfrun.bat +98 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +82 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkMigrate.java +458 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkNew.java +419 -0
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkSelfUpdate.java +248 -0
- data/embulk-cli/src/main/sh/selfrun.sh +0 -103
- data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +158 -143
- data/embulk-core/build.gradle +2 -2
- data/embulk-core/src/main/java/org/embulk/EmbulkVersion.java +109 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +11 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +29 -3
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +47 -13
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +6 -3
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +385 -64
- data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +2 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +62 -0
- data/embulk-docs/src/built-in.rst +59 -21
- data/embulk-docs/src/customization.rst +8 -8
- data/embulk-docs/src/developers/index.rst +45 -0
- data/embulk-docs/src/index.rst +11 -7
- data/embulk-docs/src/recipe.rst +1 -1
- data/embulk-docs/src/recipe/{scheduled-csv-load-to-elasticsearch-kibana4.rst → scheduled-csv-load-to-elasticsearch-kibana5.rst} +26 -24
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.0.rst +1 -1
- data/embulk-docs/src/release/release-0.5.0.rst +1 -1
- data/embulk-docs/src/release/release-0.6.0.rst +1 -1
- data/embulk-docs/src/release/release-0.6.20.rst +1 -1
- data/embulk-docs/src/release/release-0.8.19.rst +43 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +30 -1
- data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +10 -0
- data/embulk-standards/src/test/java/org/embulk/standards/preview/TestFilePreview.java +73 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_guessed.yml +2 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_seed.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_exec.yml +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_load.yml +19 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_previewed.csv +1 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple.csv +5 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_load.yml +19 -0
- data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_previewed.csv +4 -0
- data/embulk-test/src/main/java/org/embulk/test/PreviewResultInputPlugin.java +65 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingBulkLoader.java +5 -0
- data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +59 -2
- data/embulk.gemspec +2 -1
- data/lib/embulk/command/embulk_run.rb +11 -49
- data/lib/embulk/data/new/README.md.vm +106 -0
- data/lib/embulk/data/new/{gitignore.erb → gitignore.vm} +3 -3
- data/lib/embulk/data/new/java/{build.gradle.erb → build.gradle.vm} +8 -8
- data/lib/embulk/data/new/java/{decoder.java.erb → decoder.java.vm} +6 -4
- data/lib/embulk/data/new/java/{encoder.java.erb → encoder.java.vm} +7 -5
- data/lib/embulk/data/new/java/{file_input.java.erb → file_input.java.vm} +9 -7
- data/lib/embulk/data/new/java/{file_output.java.erb → file_output.java.vm} +7 -5
- data/lib/embulk/data/new/java/{filter.java.erb → filter.java.vm} +4 -3
- data/lib/embulk/data/new/java/{formatter.java.erb → formatter.java.vm} +5 -4
- data/lib/embulk/data/new/java/{input.java.erb → input.java.vm} +6 -4
- data/lib/embulk/data/new/java/{output.java.erb → output.java.vm} +7 -5
- data/lib/embulk/data/new/java/{parser.java.erb → parser.java.vm} +5 -4
- data/lib/embulk/data/new/java/plugin_loader.rb.vm +3 -0
- data/lib/embulk/data/new/java/test.java.vm +5 -0
- data/lib/embulk/data/new/ruby/decoder_guess.rb.vm +25 -0
- data/lib/embulk/data/new/ruby/{filter.rb.erb → filter.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/{formatter.rb.erb → formatter.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/gemspec.vm +20 -0
- data/lib/embulk/data/new/ruby/{input.rb.erb → input.rb.vm} +10 -10
- data/lib/embulk/data/new/ruby/{output.rb.erb → output.rb.vm} +7 -7
- data/lib/embulk/data/new/ruby/{parser.rb.erb → parser.rb.vm} +2 -2
- data/lib/embulk/data/new/ruby/parser_guess.rb.vm +65 -0
- data/lib/embulk/guess/csv.rb +5 -0
- data/lib/embulk/version.rb +22 -1
- metadata +55 -35
- data/lib/embulk/command/embulk_example.rb +0 -33
- data/lib/embulk/command/embulk_generate_bin.rb +0 -62
- data/lib/embulk/command/embulk_migrate_plugin.rb +0 -244
- data/lib/embulk/command/embulk_new_plugin.rb +0 -126
- data/lib/embulk/command/embulk_selfupdate.rb +0 -121
- data/lib/embulk/data/new/README.md.erb +0 -111
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +0 -3
- data/lib/embulk/data/new/java/test.java.erb +0 -5
- data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +0 -25
- data/lib/embulk/data/new/ruby/gemspec.erb +0 -20
- data/lib/embulk/data/new/ruby/parser_guess.rb.erb +0 -65
data/embulk-docs/src/release.rst
CHANGED
@@ -2,7 +2,7 @@ Release 0.6.0
|
|
2
2
|
==================================
|
3
3
|
|
4
4
|
Executor Plugin Mechanism
|
5
|
-
|
5
|
+
-------------------------
|
6
6
|
|
7
7
|
Now executor of Embulk is fully extensible using plugins. Executor plugins get input, filter and output plugins from the Embulk framework and runs them using multiple threads, processes, or servers. While input, filter and output plugins are response for data processing, executor plugins are responsible for scheduling the processing tasks and managing parallelism for performance.
|
8
8
|
|
@@ -2,7 +2,7 @@ Release 0.6.20
|
|
2
2
|
==================================
|
3
3
|
|
4
4
|
Command line interface
|
5
|
-
|
5
|
+
----------------------
|
6
6
|
|
7
7
|
* Added ``-X key=value`` argument to set system config. This argument is intended to overwrite performance parameters such as number of threads (``max_threads``) or page buffer size (``page_size``).
|
8
8
|
|
@@ -0,0 +1,43 @@
|
|
1
|
+
Release 0.8.19
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed LocalFileInputPlugin to follow link if path prefix is symlink. (@hiroyuki-sato) [#585]
|
8
|
+
|
9
|
+
* Fixed CsvParserPlugin to refresh the number of skipped header lines for each file. [#567]
|
10
|
+
|
11
|
+
* Fixed embulk/guess/csv.rb to finish immidiately if CSV parser could not parse sample_lines. [#556]
|
12
|
+
|
13
|
+
* Fixed PageBuilder to avoid saving unexpected string values saving to stringReferences. [#598]
|
14
|
+
|
15
|
+
* Fixed PreviewExecutor to avoid to call SamplingPageOutput#finish twice. [#571]
|
16
|
+
|
17
|
+
* Fixed TempFileSpace to change temporary filename format for Windows safe. (@hiroyuki-sato) [#589]
|
18
|
+
|
19
|
+
* Added 'preview_sample_buffer_bytes' option for 'preview' command to make sampling buffer bytes configurable. [#572]
|
20
|
+
|
21
|
+
* Added 'guess_sample_buffer_bytes' option for 'guess' command to make sampling buffer bytes configurable. [#594]
|
22
|
+
|
23
|
+
* Updated snakeyaml from 1.14 to 1.18. (@hiroyuki-sato) [#575]
|
24
|
+
|
25
|
+
* Updated liquid from 3.0.6 to 4.0.0. (@hiroyuki-sato) [#587]
|
26
|
+
|
27
|
+
* Minor fix:
|
28
|
+
|
29
|
+
* Removed lib/embulk/command/embulk_generate_bin.rb that is not called anywhere. [#578]
|
30
|
+
|
31
|
+
* Deprecated lib/embulk/version.rb 'Embulk::VERSION', and used the jar manifest (META-INF/MANIFEST.MF) to provide the Embulk version at runtime [#596, #597]
|
32
|
+
|
33
|
+
* Rewrote the "example" subcommand in Java. [#558]
|
34
|
+
|
35
|
+
* Rewrote the "selfupdate" subcommand in Java. [#563]
|
36
|
+
|
37
|
+
* Rewrote the "migrate" subcommand in Java. [#568]
|
38
|
+
|
39
|
+
* Rewrote the "new" subcommand in Java. [#569]
|
40
|
+
|
41
|
+
Release Date
|
42
|
+
------------------
|
43
|
+
2017-05-12
|
@@ -236,12 +236,12 @@ public class CsvParserPlugin
|
|
236
236
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
237
237
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
238
238
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
239
|
-
int skipHeaderLines = task.getSkipHeaderLines();
|
239
|
+
final int skipHeaderLines = task.getSkipHeaderLines();
|
240
240
|
|
241
241
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
242
242
|
while (tokenizer.nextFile()) {
|
243
243
|
// skip the header lines for each file
|
244
|
-
for (
|
244
|
+
for (int skipHeaderLineNumber = skipHeaderLines; skipHeaderLineNumber > 0; skipHeaderLineNumber--) {
|
245
245
|
if (!tokenizer.skipHeaderLine()) {
|
246
246
|
break;
|
247
247
|
}
|
@@ -30,6 +30,10 @@ import org.embulk.spi.TransactionalFileInput;
|
|
30
30
|
import org.embulk.spi.util.InputStreamTransactionalFileInput;
|
31
31
|
import org.slf4j.Logger;
|
32
32
|
|
33
|
+
import java.nio.file.FileVisitOption;
|
34
|
+
import java.util.EnumSet;
|
35
|
+
import java.util.Set;
|
36
|
+
|
33
37
|
public class LocalFileInputPlugin
|
34
38
|
implements FileInputPlugin
|
35
39
|
{
|
@@ -43,6 +47,10 @@ public class LocalFileInputPlugin
|
|
43
47
|
@ConfigDefault("null")
|
44
48
|
Optional<String> getLastPath();
|
45
49
|
|
50
|
+
@Config("follow_symlinks")
|
51
|
+
@ConfigDefault("false")
|
52
|
+
boolean getFollowSymlinks();
|
53
|
+
|
46
54
|
List<String> getFiles();
|
47
55
|
void setFiles(List<String> files);
|
48
56
|
|
@@ -120,7 +128,17 @@ public class LocalFileInputPlugin
|
|
120
128
|
final String lastPath = task.getLastPath().orNull();
|
121
129
|
try {
|
122
130
|
log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
|
123
|
-
|
131
|
+
|
132
|
+
int maxDepth = Integer.MAX_VALUE;
|
133
|
+
Set<FileVisitOption> opts;
|
134
|
+
if (task.getFollowSymlinks()) {
|
135
|
+
opts = EnumSet.of(FileVisitOption.FOLLOW_LINKS);
|
136
|
+
} else {
|
137
|
+
opts = EnumSet.noneOf(FileVisitOption.class);
|
138
|
+
log.info("\"follow_symlinks\" is set false. Note that symbolic links to directories are skipped.");
|
139
|
+
}
|
140
|
+
|
141
|
+
Files.walkFileTree(directory, opts, maxDepth, new SimpleFileVisitor<Path>() {
|
124
142
|
@Override
|
125
143
|
public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
|
126
144
|
{
|
@@ -148,6 +166,17 @@ public class LocalFileInputPlugin
|
|
148
166
|
@Override
|
149
167
|
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
|
150
168
|
{
|
169
|
+
try {
|
170
|
+
// Avoid directories from listing.
|
171
|
+
// Directories are normally unvisited with |FileVisitor#visitFile|, but symbolic links to
|
172
|
+
// directories are visited like files unless |FOLLOW_LINKS| is set in |Files#walkFileTree|.
|
173
|
+
// Symbolic links to directories are explicitly skipped here by checking with |Path#toReadlPath|.
|
174
|
+
if (Files.isDirectory(path.toRealPath())) {
|
175
|
+
return FileVisitResult.CONTINUE;
|
176
|
+
}
|
177
|
+
} catch (IOException ex){
|
178
|
+
throw new RuntimeException("Can't resolve symbolic link", ex);
|
179
|
+
}
|
151
180
|
if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
|
152
181
|
return FileVisitResult.CONTINUE;
|
153
182
|
} else {
|
@@ -219,6 +219,16 @@ public class TestCsvGuessPlugin
|
|
219
219
|
"test_backslash_escape_guessed.yml");
|
220
220
|
}
|
221
221
|
|
222
|
+
@Test
|
223
|
+
public void skipSuggestIfEmptySampleRecords()
|
224
|
+
throws Exception
|
225
|
+
{
|
226
|
+
// This test checks that the CSV guess doesn't suggest anything by invalid formatted CSV file.
|
227
|
+
assertGuessByResource(embulk,
|
228
|
+
"test_skip_suggest_if_empty_sample_records_seed.yml", "test_skip_suggest_if_empty_sample_records.csv",
|
229
|
+
"test_skip_suggest_if_empty_sample_records_guessed.yml");
|
230
|
+
}
|
231
|
+
|
222
232
|
static void assertGuessByResource(TestingEmbulk embulk, String seedYamlResourceName, String sourceCsvResourceName,
|
223
233
|
String resultResourceName)
|
224
234
|
throws IOException
|
@@ -0,0 +1,73 @@
|
|
1
|
+
package org.embulk.standards.preview;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigSource;
|
4
|
+
import org.embulk.exec.PreviewResult;
|
5
|
+
import org.embulk.test.TestingEmbulk;
|
6
|
+
import org.junit.Rule;
|
7
|
+
import org.junit.Test;
|
8
|
+
|
9
|
+
import java.io.IOException;
|
10
|
+
import java.nio.file.Path;
|
11
|
+
|
12
|
+
import static org.embulk.test.EmbulkTests.copyResource;
|
13
|
+
import static org.embulk.test.EmbulkTests.readFile;
|
14
|
+
import static org.embulk.test.EmbulkTests.readResource;
|
15
|
+
import static org.hamcrest.Matchers.is;
|
16
|
+
import static org.junit.Assert.assertThat;
|
17
|
+
|
18
|
+
public class TestFilePreview
|
19
|
+
{
|
20
|
+
private static final String RESOURCE_NAME_PREFIX = "org/embulk/standards/preview/file/test/";
|
21
|
+
|
22
|
+
@Rule
|
23
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
24
|
+
.build();
|
25
|
+
|
26
|
+
@Test
|
27
|
+
public void testSimple()
|
28
|
+
throws Exception
|
29
|
+
{
|
30
|
+
assertPreviewedRecords(embulk, "test_simple_load.yml", "test_simple.csv", "test_simple_previewed.csv");
|
31
|
+
}
|
32
|
+
|
33
|
+
@Test
|
34
|
+
public void changePreviewSampleBufferBytes()
|
35
|
+
throws Exception
|
36
|
+
{
|
37
|
+
assertPreviewedRecords(embulk, "test_sample_buffer_bytes_load.yml", "test_sample_buffer_bytes_exec.yml",
|
38
|
+
"test_sample_buffer_bytes.csv", "test_sample_buffer_bytes_previewed.csv");
|
39
|
+
}
|
40
|
+
|
41
|
+
private static void assertPreviewedRecords(TestingEmbulk embulk,
|
42
|
+
String loadYamlResourceName, String sourceCsvResourceName, String resultCsvResourceName)
|
43
|
+
throws IOException
|
44
|
+
{
|
45
|
+
assertPreviewedRecords(embulk, loadYamlResourceName, null, sourceCsvResourceName, resultCsvResourceName);
|
46
|
+
}
|
47
|
+
|
48
|
+
private static void assertPreviewedRecords(TestingEmbulk embulk,
|
49
|
+
String loadYamlResourceName, String execYamlResourceName, String sourceCsvResourceName, String resultCsvResourceName)
|
50
|
+
throws IOException
|
51
|
+
{
|
52
|
+
Path inputPath = embulk.createTempFile("csv");
|
53
|
+
Path outputPath = embulk.createTempFile("csv");
|
54
|
+
|
55
|
+
// in: config
|
56
|
+
copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
|
57
|
+
ConfigSource load = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + loadYamlResourceName)
|
58
|
+
.set("path_prefix", inputPath.toAbsolutePath().toString());
|
59
|
+
|
60
|
+
// exec: config
|
61
|
+
final TestingEmbulk.InputBuilder builder = embulk.inputBuilder();
|
62
|
+
if (execYamlResourceName != null) {
|
63
|
+
final ConfigSource execConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + execYamlResourceName);
|
64
|
+
builder.exec(execConfig);
|
65
|
+
}
|
66
|
+
|
67
|
+
// execute preview
|
68
|
+
final PreviewResult result = builder.in(load).outputPath(outputPath).preview();
|
69
|
+
|
70
|
+
assertThat(readFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
{}
|
@@ -0,0 +1 @@
|
|
1
|
+
preview_sample_buffer_bytes: 96
|
@@ -0,0 +1,19 @@
|
|
1
|
+
type: file
|
2
|
+
parser:
|
3
|
+
charset: UTF-8
|
4
|
+
newline: LF
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
quote: '"'
|
8
|
+
escape: '"'
|
9
|
+
null_string: "NULL"
|
10
|
+
trim_if_not_quoted: false
|
11
|
+
skip_header_lines: 1
|
12
|
+
allow_extra_columns: false
|
13
|
+
allow_optional_columns: false
|
14
|
+
columns:
|
15
|
+
- {name: id, type: long}
|
16
|
+
- {name: account, type: long}
|
17
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
18
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
19
|
+
- {name: comment, type: string}
|
@@ -0,0 +1 @@
|
|
1
|
+
1,32864,2015-01-27 19:23:49.000000 +0000,2015-01-27 00:00:00.000000 +0000,embulk
|
data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_load.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
type: file
|
2
|
+
parser:
|
3
|
+
charset: UTF-8
|
4
|
+
newline: LF
|
5
|
+
type: csv
|
6
|
+
delimiter: ','
|
7
|
+
quote: '"'
|
8
|
+
escape: '"'
|
9
|
+
null_string: "NULL"
|
10
|
+
trim_if_not_quoted: false
|
11
|
+
skip_header_lines: 1
|
12
|
+
allow_extra_columns: false
|
13
|
+
allow_optional_columns: false
|
14
|
+
columns:
|
15
|
+
- {name: id, type: long}
|
16
|
+
- {name: account, type: long}
|
17
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
18
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
19
|
+
- {name: comment, type: string}
|
@@ -0,0 +1,4 @@
|
|
1
|
+
1,32864,2015-01-27 19:23:49.000000 +0000,2015-01-27 00:00:00.000000 +0000,embulk
|
2
|
+
2,14824,2015-01-27 19:01:23.000000 +0000,2015-01-27 00:00:00.000000 +0000,embulk jruby
|
3
|
+
3,27559,2015-01-28 02:20:02.000000 +0000,2015-01-28 00:00:00.000000 +0000,"Embulk ""csv"" parser plugin"
|
4
|
+
4,11270,2015-01-29 11:54:36.000000 +0000,2015-01-29 00:00:00.000000 +0000,
|
@@ -0,0 +1,65 @@
|
|
1
|
+
package org.embulk.test;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import org.embulk.config.TaskReport;
|
7
|
+
import org.embulk.config.TaskSource;
|
8
|
+
import org.embulk.exec.PreviewResult;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.spi.InputPlugin;
|
11
|
+
import org.embulk.spi.Page;
|
12
|
+
import org.embulk.spi.PageOutput;
|
13
|
+
import org.embulk.spi.Schema;
|
14
|
+
|
15
|
+
import java.util.List;
|
16
|
+
|
17
|
+
import static com.google.common.base.Preconditions.checkState;
|
18
|
+
|
19
|
+
/**
|
20
|
+
* This plugin is used for TestingEmbulk.InputBuilder.preview().
|
21
|
+
*/
|
22
|
+
public final class PreviewResultInputPlugin
|
23
|
+
implements InputPlugin
|
24
|
+
{
|
25
|
+
private static PreviewResult previewResult;
|
26
|
+
|
27
|
+
public static void setPreviewResult(PreviewResult result)
|
28
|
+
{
|
29
|
+
previewResult = result;
|
30
|
+
}
|
31
|
+
|
32
|
+
@Override
|
33
|
+
public ConfigDiff transaction(ConfigSource config, Control control)
|
34
|
+
{
|
35
|
+
checkState(previewResult != null, "PreviewResult object must be set");
|
36
|
+
return resume(config.loadConfig(Task.class).dump(), previewResult.getSchema(), 1, control);
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public ConfigDiff resume(TaskSource taskSource, Schema schema, int taskCount, Control control)
|
41
|
+
{
|
42
|
+
control.run(taskSource, schema, taskCount);
|
43
|
+
return Exec.newConfigDiff();
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public void cleanup(TaskSource taskSource, Schema schema, int taskCount, List<TaskReport> successTaskReports)
|
48
|
+
{
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output)
|
53
|
+
{
|
54
|
+
for (Page page : previewResult.getPages()) {
|
55
|
+
output.add(page);
|
56
|
+
}
|
57
|
+
return Exec.newTaskReport();
|
58
|
+
}
|
59
|
+
|
60
|
+
@Override
|
61
|
+
public ConfigDiff guess(ConfigSource config)
|
62
|
+
{
|
63
|
+
return Exec.newConfigDiff();
|
64
|
+
}
|
65
|
+
}
|
@@ -9,6 +9,7 @@ import com.google.inject.Injector;
|
|
9
9
|
import com.google.inject.Module;
|
10
10
|
import com.google.inject.util.Modules;
|
11
11
|
import java.util.List;
|
12
|
+
|
12
13
|
import org.embulk.config.ConfigSource;
|
13
14
|
import org.embulk.config.TaskReport;
|
14
15
|
import org.embulk.exec.BulkLoader;
|
@@ -17,9 +18,12 @@ import org.embulk.exec.ForSystemConfig;
|
|
17
18
|
import org.embulk.exec.ResumeState;
|
18
19
|
import org.embulk.spi.Exec;
|
19
20
|
import org.embulk.spi.ExecSession;
|
21
|
+
import org.embulk.spi.InputPlugin;
|
20
22
|
import org.embulk.spi.Schema;
|
21
23
|
import org.slf4j.Logger;
|
22
24
|
|
25
|
+
import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
|
26
|
+
|
23
27
|
class TestingBulkLoader
|
24
28
|
extends BulkLoader
|
25
29
|
{
|
@@ -33,6 +37,7 @@ class TestingBulkLoader
|
|
33
37
|
public void configure(Binder binder)
|
34
38
|
{
|
35
39
|
binder.bind(BulkLoader.class).to(TestingBulkLoader.class);
|
40
|
+
registerPluginTo(binder, InputPlugin.class, "preview_result", PreviewResultInputPlugin.class);
|
36
41
|
}
|
37
42
|
};
|
38
43
|
return ImmutableList.of(Modules.override(modules).with(ImmutableList.of(override)));
|
@@ -25,7 +25,11 @@ import org.embulk.config.ConfigLoader;
|
|
25
25
|
import org.embulk.config.ConfigSource;
|
26
26
|
import org.embulk.config.ModelManager;
|
27
27
|
import org.embulk.config.TaskReport;
|
28
|
+
import org.embulk.exec.PreviewResult;
|
29
|
+
import org.embulk.plugin.PluginClassLoader;
|
28
30
|
import org.embulk.spi.ColumnConfig;
|
31
|
+
import org.embulk.spi.FileOutputRunner;
|
32
|
+
import org.embulk.spi.Page;
|
29
33
|
import org.embulk.spi.Schema;
|
30
34
|
import org.embulk.spi.SchemaConfig;
|
31
35
|
import org.embulk.spi.TempFileException;
|
@@ -241,6 +245,47 @@ public class TestingEmbulk
|
|
241
245
|
return embed.guess(config).getNested("in");
|
242
246
|
}
|
243
247
|
|
248
|
+
/**
|
249
|
+
* This method returns PreviewResult.
|
250
|
+
*
|
251
|
+
* @return PreviewResult returns the result by PreviewExecutor
|
252
|
+
* @throws IOException
|
253
|
+
*/
|
254
|
+
public PreviewResult preview()
|
255
|
+
throws IOException
|
256
|
+
{
|
257
|
+
checkState(inConfig != null, "inputPath must be set");
|
258
|
+
checkState(outputPath != null, "outputPath must be set");
|
259
|
+
|
260
|
+
// Execute preview to get PreviewResult
|
261
|
+
ConfigSource previewConfig = newConfig()
|
262
|
+
.set("exec", execConfig.set("min_output_tasks", 1)) // exec: config
|
263
|
+
.set("in", inConfig)
|
264
|
+
.set("filters", filtersConfig);
|
265
|
+
PreviewResult result = embed.preview(previewConfig);
|
266
|
+
PreviewResultInputPlugin.setPreviewResult(result);
|
267
|
+
|
268
|
+
String fileName = outputPath.getFileName().toString();
|
269
|
+
checkArgument(fileName.endsWith(".csv"), "outputPath must end with .csv");
|
270
|
+
Path dir = outputPath.getParent().resolve(fileName.substring(0, fileName.length() - 4));
|
271
|
+
Files.createDirectories(dir);
|
272
|
+
|
273
|
+
// Execute run to write PreviewResult's Page objects to output files
|
274
|
+
ConfigSource runConfig = newConfig()
|
275
|
+
.set("in", newConfig().set("type", "preview_result")) // in: config
|
276
|
+
.set("out", newConfig() // out: config
|
277
|
+
.set("type", "file")
|
278
|
+
.set("path_prefix", dir.resolve("fragments_").toString())
|
279
|
+
.set("file_ext", "csv")
|
280
|
+
.set("formatter", newConfig()
|
281
|
+
.set("type", "csv")
|
282
|
+
.set("header_line", false)
|
283
|
+
.set("newline", "LF")));
|
284
|
+
embed.run(runConfig);
|
285
|
+
|
286
|
+
return buildPreviewResultWithOutput(result, dir, outputPath);
|
287
|
+
}
|
288
|
+
|
244
289
|
public RunResult run()
|
245
290
|
throws IOException
|
246
291
|
{
|
@@ -514,8 +559,22 @@ public class TestingEmbulk
|
|
514
559
|
}
|
515
560
|
}
|
516
561
|
|
562
|
+
private PreviewResult buildPreviewResultWithOutput(PreviewResult result, Path outputDir, Path outputPath)
|
563
|
+
throws IOException
|
564
|
+
{
|
565
|
+
copyToPath(outputDir, outputPath);
|
566
|
+
return result;
|
567
|
+
}
|
568
|
+
|
517
569
|
private RunResult buildRunResultWithOutput(RunResult result, Path outputDir, Path outputPath)
|
518
570
|
throws IOException
|
571
|
+
{
|
572
|
+
copyToPath(outputDir, outputPath);
|
573
|
+
return result;
|
574
|
+
}
|
575
|
+
|
576
|
+
private void copyToPath(Path outputDir, Path outputPath)
|
577
|
+
throws IOException
|
519
578
|
{
|
520
579
|
try (OutputStream out = Files.newOutputStream(outputPath)) {
|
521
580
|
List<Path> fragments = new ArrayList<Path>();
|
@@ -531,8 +590,6 @@ public class TestingEmbulk
|
|
531
590
|
}
|
532
591
|
}
|
533
592
|
}
|
534
|
-
|
535
|
-
return result;
|
536
593
|
}
|
537
594
|
|
538
595
|
public InputBuilder inputBuilder()
|