embulk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
@@ -0,0 +1,138 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.io.File;
|
5
|
+
import java.io.FileInputStream;
|
6
|
+
import java.io.InputStream;
|
7
|
+
import java.io.IOException;
|
8
|
+
import java.nio.file.Path;
|
9
|
+
import java.nio.file.Paths;
|
10
|
+
import java.nio.file.Files;
|
11
|
+
import java.nio.file.SimpleFileVisitor;
|
12
|
+
import java.nio.file.FileVisitResult;
|
13
|
+
import java.nio.file.attribute.BasicFileAttributes;
|
14
|
+
import javax.validation.constraints.NotNull;
|
15
|
+
import com.google.common.collect.ImmutableList;
|
16
|
+
import com.fasterxml.jackson.annotation.JacksonInject;
|
17
|
+
import org.embulk.config.Config;
|
18
|
+
import org.embulk.config.Task;
|
19
|
+
import org.embulk.config.TaskSource;
|
20
|
+
import org.embulk.config.ConfigSource;
|
21
|
+
import org.embulk.config.NextConfig;
|
22
|
+
import org.embulk.config.CommitReport;
|
23
|
+
import org.embulk.spi.BufferAllocator;
|
24
|
+
import org.embulk.spi.Exec;
|
25
|
+
import org.embulk.spi.FileInputPlugin;
|
26
|
+
import org.embulk.spi.TransactionalFileInput;
|
27
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
28
|
+
import org.slf4j.Logger;
|
29
|
+
|
30
|
+
import static org.embulk.spi.util.Inputs.formatPath;
|
31
|
+
|
32
|
+
public class LocalFileInputPlugin
|
33
|
+
implements FileInputPlugin
|
34
|
+
{
|
35
|
+
public interface PluginTask
|
36
|
+
extends Task
|
37
|
+
{
|
38
|
+
@Config("paths")
|
39
|
+
@NotNull
|
40
|
+
public List<String> getPathPrefixes();
|
41
|
+
|
42
|
+
public List<String> getFiles();
|
43
|
+
public void setFiles(List<String> files);
|
44
|
+
|
45
|
+
@JacksonInject
|
46
|
+
public BufferAllocator getBufferAllocator();
|
47
|
+
}
|
48
|
+
|
49
|
+
private final Logger log = Exec.getLogger(getClass());
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
53
|
+
{
|
54
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
55
|
+
|
56
|
+
// list files recursively
|
57
|
+
task.setFiles(listFiles(task));
|
58
|
+
|
59
|
+
// run with threads. number of processors is same with number of files
|
60
|
+
control.run(task.dump(), task.getFiles().size());
|
61
|
+
|
62
|
+
return Exec.newNextConfig();
|
63
|
+
}
|
64
|
+
|
65
|
+
public List<String> listFiles(PluginTask task)
|
66
|
+
{
|
67
|
+
final ImmutableList.Builder<String> builder = ImmutableList.builder();
|
68
|
+
for (String prefix : task.getPathPrefixes()) {
|
69
|
+
String formatted = formatPath(prefix);
|
70
|
+
try {
|
71
|
+
log.info("Listing local files with prefix '{}'", formatted);
|
72
|
+
Files.walkFileTree(Paths.get(formatted), new SimpleFileVisitor<Path>() {
|
73
|
+
@Override
|
74
|
+
public FileVisitResult visitFile(Path file, BasicFileAttributes aAttrs)
|
75
|
+
{
|
76
|
+
builder.add(file.toString());
|
77
|
+
return FileVisitResult.CONTINUE;
|
78
|
+
}
|
79
|
+
});
|
80
|
+
} catch (IOException ex) {
|
81
|
+
throw new RuntimeException(String.format("Failed get a list of local files at '%s'", formatted), ex);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
return builder.build();
|
85
|
+
}
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
89
|
+
{
|
90
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
91
|
+
return new LocalFileInput(task, processorIndex);
|
92
|
+
}
|
93
|
+
|
94
|
+
public static class LocalFileInput
|
95
|
+
extends InputStreamFileInput
|
96
|
+
implements TransactionalFileInput
|
97
|
+
{
|
98
|
+
// TODO create single-file InputStreamFileInput utility
|
99
|
+
private static class SingleFileProvider
|
100
|
+
implements InputStreamFileInput.Provider
|
101
|
+
{
|
102
|
+
private final File file;
|
103
|
+
private boolean opened = false;
|
104
|
+
|
105
|
+
public SingleFileProvider(File file)
|
106
|
+
{
|
107
|
+
this.file = file;
|
108
|
+
}
|
109
|
+
|
110
|
+
@Override
|
111
|
+
public InputStream openNext() throws IOException
|
112
|
+
{
|
113
|
+
if (opened) {
|
114
|
+
return null;
|
115
|
+
}
|
116
|
+
opened = true;
|
117
|
+
return new FileInputStream(file);
|
118
|
+
}
|
119
|
+
|
120
|
+
@Override
|
121
|
+
public void close() { }
|
122
|
+
}
|
123
|
+
|
124
|
+
public LocalFileInput(PluginTask task, int processorIndex)
|
125
|
+
{
|
126
|
+
super(task.getBufferAllocator(), new SingleFileProvider(new File(task.getFiles().get(processorIndex))));
|
127
|
+
}
|
128
|
+
|
129
|
+
@Override
|
130
|
+
public void abort() { }
|
131
|
+
|
132
|
+
@Override
|
133
|
+
public CommitReport commit()
|
134
|
+
{
|
135
|
+
return Exec.newCommitReport();
|
136
|
+
}
|
137
|
+
}
|
138
|
+
}
|
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileNotFoundException;
|
5
|
+
import java.io.FileOutputStream;
|
6
|
+
import java.io.IOException;
|
7
|
+
import java.io.OutputStream;
|
8
|
+
import java.util.ArrayList;
|
9
|
+
import java.util.List;
|
10
|
+
import org.embulk.config.Config;
|
11
|
+
import org.embulk.config.ConfigSource;
|
12
|
+
import org.embulk.config.NextConfig;
|
13
|
+
import org.embulk.config.CommitReport;
|
14
|
+
import org.embulk.config.Task;
|
15
|
+
import org.embulk.config.TaskSource;
|
16
|
+
import org.embulk.spi.Buffer;
|
17
|
+
import org.embulk.spi.FileOutputPlugin;
|
18
|
+
import org.embulk.spi.TransactionalFileOutput;
|
19
|
+
import org.embulk.spi.Exec;
|
20
|
+
import org.slf4j.Logger;
|
21
|
+
|
22
|
+
public class LocalFileOutputPlugin
|
23
|
+
implements FileOutputPlugin
|
24
|
+
{
|
25
|
+
public interface PluginTask
|
26
|
+
extends Task
|
27
|
+
{
|
28
|
+
@Config("directory")
|
29
|
+
public String getDirectory();
|
30
|
+
|
31
|
+
@Config("file_name")
|
32
|
+
public String getFileNameFormat();
|
33
|
+
|
34
|
+
@Config("file_ext")
|
35
|
+
public String getFileNameExtension();
|
36
|
+
|
37
|
+
// TODO support in FileInputPlugin and FileOutputPlugin
|
38
|
+
//@Config("compress_type")
|
39
|
+
//public String getCompressType();
|
40
|
+
}
|
41
|
+
|
42
|
+
private final Logger log = Exec.getLogger(getClass());
|
43
|
+
|
44
|
+
@Override
|
45
|
+
public NextConfig transaction(ConfigSource config, int processorCount,
|
46
|
+
FileOutputPlugin.Control control)
|
47
|
+
{
|
48
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
49
|
+
|
50
|
+
control.run(task.dump());
|
51
|
+
|
52
|
+
return Exec.newNextConfig();
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
|
57
|
+
{
|
58
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
59
|
+
|
60
|
+
// TODO format path using timestamp
|
61
|
+
final String fileName = task.getFileNameFormat();
|
62
|
+
|
63
|
+
final String pathPrefix = task.getDirectory() + File.separator + fileName;
|
64
|
+
final String pathSuffix = task.getFileNameExtension();
|
65
|
+
|
66
|
+
final List<String> fileNames = new ArrayList<>();
|
67
|
+
|
68
|
+
return new TransactionalFileOutput() {
|
69
|
+
private int fileIndex = 0;
|
70
|
+
private FileOutputStream output = null;
|
71
|
+
|
72
|
+
public void nextFile()
|
73
|
+
{
|
74
|
+
closeFile();
|
75
|
+
String path = pathPrefix + String.format(".%03d.%02d.", processorIndex, fileIndex) + pathSuffix;
|
76
|
+
log.info("Writing local file '{}'", path);
|
77
|
+
fileNames.add(path);
|
78
|
+
try {
|
79
|
+
output = new FileOutputStream(new File(path));
|
80
|
+
} catch (FileNotFoundException ex) {
|
81
|
+
throw new RuntimeException(ex); // TODO exception class
|
82
|
+
}
|
83
|
+
fileIndex++;
|
84
|
+
}
|
85
|
+
|
86
|
+
private void closeFile()
|
87
|
+
{
|
88
|
+
if (output != null) {
|
89
|
+
try {
|
90
|
+
output.close();
|
91
|
+
} catch (IOException ex) {
|
92
|
+
throw new RuntimeException(ex);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
public void add(Buffer buffer)
|
98
|
+
{
|
99
|
+
try {
|
100
|
+
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
101
|
+
} catch (IOException ex) {
|
102
|
+
throw new RuntimeException(ex);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
public void finish()
|
107
|
+
{
|
108
|
+
closeFile();
|
109
|
+
}
|
110
|
+
|
111
|
+
public void close()
|
112
|
+
{
|
113
|
+
closeFile();
|
114
|
+
}
|
115
|
+
|
116
|
+
public void abort() { }
|
117
|
+
|
118
|
+
public CommitReport commit()
|
119
|
+
{
|
120
|
+
CommitReport report = Exec.newCommitReport();
|
121
|
+
// TODO better setting for Report
|
122
|
+
// report.set("file_names", fileNames);
|
123
|
+
// report.set("file_sizes", fileSizes);
|
124
|
+
return report;
|
125
|
+
}
|
126
|
+
};
|
127
|
+
}
|
128
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigSource;
|
4
|
+
import org.embulk.config.TaskSource;
|
5
|
+
import org.embulk.config.NextConfig;
|
6
|
+
import org.embulk.config.CommitReport;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
import org.embulk.spi.Page;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.spi.OutputPlugin;
|
11
|
+
import org.embulk.spi.TransactionalPageOutput;
|
12
|
+
|
13
|
+
public class NullOutputPlugin
|
14
|
+
implements OutputPlugin
|
15
|
+
{
|
16
|
+
@Override
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
18
|
+
Schema schema, int processorCount,
|
19
|
+
OutputPlugin.Control control)
|
20
|
+
{
|
21
|
+
control.run(Exec.newTaskSource());
|
22
|
+
return Exec.newNextConfig();
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
|
27
|
+
{
|
28
|
+
return new TransactionalPageOutput() {
|
29
|
+
public void add(Page page)
|
30
|
+
{
|
31
|
+
page.release();
|
32
|
+
}
|
33
|
+
|
34
|
+
public void finish() { }
|
35
|
+
|
36
|
+
public void close() { }
|
37
|
+
|
38
|
+
public void abort() { }
|
39
|
+
|
40
|
+
public CommitReport commit()
|
41
|
+
{
|
42
|
+
return Exec.newCommitReport();
|
43
|
+
}
|
44
|
+
};
|
45
|
+
}
|
46
|
+
}
|
@@ -0,0 +1,238 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.io.InputStream;
|
6
|
+
|
7
|
+
import com.amazonaws.AmazonClientException;
|
8
|
+
import com.amazonaws.AmazonServiceException;
|
9
|
+
import com.google.common.collect.ImmutableList;
|
10
|
+
import com.google.common.base.Optional;
|
11
|
+
import com.fasterxml.jackson.annotation.JacksonInject;
|
12
|
+
import com.amazonaws.auth.AWSCredentials;
|
13
|
+
import com.amazonaws.auth.AWSCredentialsProvider;
|
14
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
15
|
+
import com.amazonaws.services.s3.AmazonS3Client;
|
16
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
17
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
19
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
20
|
+
import com.amazonaws.services.s3.model.S3Object;
|
21
|
+
import com.amazonaws.ClientConfiguration;
|
22
|
+
import org.embulk.config.Config;
|
23
|
+
import org.embulk.config.Task;
|
24
|
+
import org.embulk.config.TaskSource;
|
25
|
+
import org.embulk.config.ConfigSource;
|
26
|
+
import org.embulk.config.NextConfig;
|
27
|
+
import org.embulk.config.CommitReport;
|
28
|
+
import org.embulk.spi.BufferAllocator;
|
29
|
+
import org.embulk.spi.Exec;
|
30
|
+
import org.embulk.spi.FileInputPlugin;
|
31
|
+
import org.embulk.spi.TransactionalFileInput;
|
32
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
33
|
+
import org.slf4j.Logger;
|
34
|
+
|
35
|
+
import static org.embulk.spi.util.Inputs.formatPath;
|
36
|
+
|
37
|
+
public class S3FileInputPlugin
|
38
|
+
implements FileInputPlugin
|
39
|
+
{
|
40
|
+
public interface PluginTask
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("bucket")
|
44
|
+
public String getBucket();
|
45
|
+
|
46
|
+
@Config("paths")
|
47
|
+
public List<String> getPathPrefixes();
|
48
|
+
|
49
|
+
@Config("endpoint")
|
50
|
+
public Optional<String> getEndpoint();
|
51
|
+
|
52
|
+
// TODO timeout, ssl, etc
|
53
|
+
|
54
|
+
@Config("access_key_id")
|
55
|
+
public String getAccessKeyId();
|
56
|
+
|
57
|
+
@Config("secret_access_key")
|
58
|
+
public String getSecretAccessKey();
|
59
|
+
|
60
|
+
// TODO support more options such as STS
|
61
|
+
|
62
|
+
public List<String> getFiles();
|
63
|
+
public void setFiles(List<String> files);
|
64
|
+
|
65
|
+
@JacksonInject
|
66
|
+
public BufferAllocator getBufferAllocator();
|
67
|
+
}
|
68
|
+
|
69
|
+
private final Logger log = Exec.getLogger(getClass());
|
70
|
+
|
71
|
+
@Override
|
72
|
+
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
73
|
+
{
|
74
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
75
|
+
|
76
|
+
// list files recursively
|
77
|
+
task.setFiles(listFiles(task));
|
78
|
+
|
79
|
+
// number of processors is same with number of files
|
80
|
+
|
81
|
+
// run
|
82
|
+
control.run(task.dump(), task.getFiles().size());
|
83
|
+
|
84
|
+
return Exec.newNextConfig();
|
85
|
+
}
|
86
|
+
|
87
|
+
public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
|
88
|
+
{
|
89
|
+
final AWSCredentials cred = new BasicAWSCredentials(
|
90
|
+
task.getAccessKeyId(), task.getSecretAccessKey());
|
91
|
+
return new AWSCredentialsProvider() {
|
92
|
+
public AWSCredentials getCredentials()
|
93
|
+
{
|
94
|
+
return cred;
|
95
|
+
}
|
96
|
+
|
97
|
+
public void refresh()
|
98
|
+
{
|
99
|
+
}
|
100
|
+
};
|
101
|
+
}
|
102
|
+
|
103
|
+
private static AmazonS3Client newS3Client(PluginTask task)
|
104
|
+
{
|
105
|
+
AWSCredentialsProvider credentials = getCredentialsProvider(task);
|
106
|
+
AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
|
107
|
+
return client;
|
108
|
+
}
|
109
|
+
|
110
|
+
private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
|
111
|
+
Optional<String> endpoint)
|
112
|
+
{
|
113
|
+
// TODO get config from AmazonS3Task
|
114
|
+
ClientConfiguration clientConfig = new ClientConfiguration();
|
115
|
+
//clientConfig.setProtocol(Protocol.HTTP);
|
116
|
+
clientConfig.setMaxConnections(50); // SDK default: 50
|
117
|
+
clientConfig.setMaxErrorRetry(3); // SDK default: 3
|
118
|
+
clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
|
119
|
+
|
120
|
+
AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
|
121
|
+
|
122
|
+
if (endpoint.isPresent()) {
|
123
|
+
client.setEndpoint(endpoint.get());
|
124
|
+
}
|
125
|
+
|
126
|
+
return client;
|
127
|
+
}
|
128
|
+
|
129
|
+
public List<String> listFiles(PluginTask task)
|
130
|
+
{
|
131
|
+
AmazonS3Client client = newS3Client(task);
|
132
|
+
String bucketName = task.getBucket();
|
133
|
+
|
134
|
+
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
135
|
+
for (String prefix : task.getPathPrefixes()) {
|
136
|
+
String formatted = formatPath(prefix);
|
137
|
+
try {
|
138
|
+
log.info("Listing S3 files with prefix '{}'", formatted);
|
139
|
+
builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
|
140
|
+
} catch (RuntimeException e) {
|
141
|
+
throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
return builder.build();
|
146
|
+
}
|
147
|
+
|
148
|
+
/**
|
149
|
+
* Lists S3 filenames filtered by prefix.
|
150
|
+
*
|
151
|
+
* The resulting list does not include the file that's size == 0.
|
152
|
+
*/
|
153
|
+
public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
|
154
|
+
{
|
155
|
+
// TODO implement retrying
|
156
|
+
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
157
|
+
|
158
|
+
String lastKey = null;
|
159
|
+
do {
|
160
|
+
ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
|
161
|
+
ObjectListing ol = client.listObjects(req);
|
162
|
+
for(S3ObjectSummary s : ol.getObjectSummaries()) {
|
163
|
+
if (s.getSize() > 0) {
|
164
|
+
builder.add(s.getKey());
|
165
|
+
}
|
166
|
+
}
|
167
|
+
lastKey = ol.getNextMarker();
|
168
|
+
} while(lastKey != null);
|
169
|
+
|
170
|
+
return builder.build();
|
171
|
+
}
|
172
|
+
|
173
|
+
@Override
|
174
|
+
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
175
|
+
{
|
176
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
177
|
+
return new S3FileInput(task, processorIndex);
|
178
|
+
}
|
179
|
+
|
180
|
+
public static class S3FileInput
|
181
|
+
extends InputStreamFileInput
|
182
|
+
implements TransactionalFileInput
|
183
|
+
{
|
184
|
+
// TODO create single-file InputStreamFileInput utility
|
185
|
+
private static class SingleFileProvider
|
186
|
+
implements InputStreamFileInput.Provider
|
187
|
+
{
|
188
|
+
private AmazonS3Client client;
|
189
|
+
private final String bucket;
|
190
|
+
private final String key;
|
191
|
+
private boolean opened = false;
|
192
|
+
|
193
|
+
public SingleFileProvider(PluginTask task, int processorIndex)
|
194
|
+
{
|
195
|
+
this.client = newS3Client(task);
|
196
|
+
this.bucket = task.getBucket();
|
197
|
+
this.key = task.getFiles().get(processorIndex);
|
198
|
+
}
|
199
|
+
|
200
|
+
@Override
|
201
|
+
public InputStream openNext() throws IOException
|
202
|
+
{
|
203
|
+
if (opened) {
|
204
|
+
return null;
|
205
|
+
}
|
206
|
+
opened = true;
|
207
|
+
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
208
|
+
//if (pos > 0) {
|
209
|
+
// request.setRange(pos, contentLength);
|
210
|
+
//}
|
211
|
+
S3Object obj = client.getObject(request);
|
212
|
+
//if (pos <= 0) {
|
213
|
+
// // first call
|
214
|
+
// contentLength = obj.getObjectMetadata().getContentLength();
|
215
|
+
//}
|
216
|
+
return obj.getObjectContent();
|
217
|
+
}
|
218
|
+
|
219
|
+
@Override
|
220
|
+
public void close() { }
|
221
|
+
}
|
222
|
+
|
223
|
+
public S3FileInput(PluginTask task, int processorIndex)
|
224
|
+
{
|
225
|
+
super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
|
226
|
+
}
|
227
|
+
|
228
|
+
public void abort() { }
|
229
|
+
|
230
|
+
public CommitReport commit()
|
231
|
+
{
|
232
|
+
return Exec.newCommitReport();
|
233
|
+
}
|
234
|
+
|
235
|
+
@Override
|
236
|
+
public void close() { }
|
237
|
+
}
|
238
|
+
}
|