embulk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
package org.embulk.standards;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.io.File;
|
|
5
|
+
import java.io.FileInputStream;
|
|
6
|
+
import java.io.InputStream;
|
|
7
|
+
import java.io.IOException;
|
|
8
|
+
import java.nio.file.Path;
|
|
9
|
+
import java.nio.file.Paths;
|
|
10
|
+
import java.nio.file.Files;
|
|
11
|
+
import java.nio.file.SimpleFileVisitor;
|
|
12
|
+
import java.nio.file.FileVisitResult;
|
|
13
|
+
import java.nio.file.attribute.BasicFileAttributes;
|
|
14
|
+
import javax.validation.constraints.NotNull;
|
|
15
|
+
import com.google.common.collect.ImmutableList;
|
|
16
|
+
import com.fasterxml.jackson.annotation.JacksonInject;
|
|
17
|
+
import org.embulk.config.Config;
|
|
18
|
+
import org.embulk.config.Task;
|
|
19
|
+
import org.embulk.config.TaskSource;
|
|
20
|
+
import org.embulk.config.ConfigSource;
|
|
21
|
+
import org.embulk.config.NextConfig;
|
|
22
|
+
import org.embulk.config.CommitReport;
|
|
23
|
+
import org.embulk.spi.BufferAllocator;
|
|
24
|
+
import org.embulk.spi.Exec;
|
|
25
|
+
import org.embulk.spi.FileInputPlugin;
|
|
26
|
+
import org.embulk.spi.TransactionalFileInput;
|
|
27
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
|
28
|
+
import org.slf4j.Logger;
|
|
29
|
+
|
|
30
|
+
import static org.embulk.spi.util.Inputs.formatPath;
|
|
31
|
+
|
|
32
|
+
public class LocalFileInputPlugin
|
|
33
|
+
implements FileInputPlugin
|
|
34
|
+
{
|
|
35
|
+
public interface PluginTask
|
|
36
|
+
extends Task
|
|
37
|
+
{
|
|
38
|
+
@Config("paths")
|
|
39
|
+
@NotNull
|
|
40
|
+
public List<String> getPathPrefixes();
|
|
41
|
+
|
|
42
|
+
public List<String> getFiles();
|
|
43
|
+
public void setFiles(List<String> files);
|
|
44
|
+
|
|
45
|
+
@JacksonInject
|
|
46
|
+
public BufferAllocator getBufferAllocator();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private final Logger log = Exec.getLogger(getClass());
|
|
50
|
+
|
|
51
|
+
@Override
|
|
52
|
+
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
|
53
|
+
{
|
|
54
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
|
55
|
+
|
|
56
|
+
// list files recursively
|
|
57
|
+
task.setFiles(listFiles(task));
|
|
58
|
+
|
|
59
|
+
// run with threads. number of processors is same with number of files
|
|
60
|
+
control.run(task.dump(), task.getFiles().size());
|
|
61
|
+
|
|
62
|
+
return Exec.newNextConfig();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
public List<String> listFiles(PluginTask task)
|
|
66
|
+
{
|
|
67
|
+
final ImmutableList.Builder<String> builder = ImmutableList.builder();
|
|
68
|
+
for (String prefix : task.getPathPrefixes()) {
|
|
69
|
+
String formatted = formatPath(prefix);
|
|
70
|
+
try {
|
|
71
|
+
log.info("Listing local files with prefix '{}'", formatted);
|
|
72
|
+
Files.walkFileTree(Paths.get(formatted), new SimpleFileVisitor<Path>() {
|
|
73
|
+
@Override
|
|
74
|
+
public FileVisitResult visitFile(Path file, BasicFileAttributes aAttrs)
|
|
75
|
+
{
|
|
76
|
+
builder.add(file.toString());
|
|
77
|
+
return FileVisitResult.CONTINUE;
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
} catch (IOException ex) {
|
|
81
|
+
throw new RuntimeException(String.format("Failed get a list of local files at '%s'", formatted), ex);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return builder.build();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@Override
|
|
88
|
+
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
|
89
|
+
{
|
|
90
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
91
|
+
return new LocalFileInput(task, processorIndex);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
public static class LocalFileInput
|
|
95
|
+
extends InputStreamFileInput
|
|
96
|
+
implements TransactionalFileInput
|
|
97
|
+
{
|
|
98
|
+
// TODO create single-file InputStreamFileInput utility
|
|
99
|
+
private static class SingleFileProvider
|
|
100
|
+
implements InputStreamFileInput.Provider
|
|
101
|
+
{
|
|
102
|
+
private final File file;
|
|
103
|
+
private boolean opened = false;
|
|
104
|
+
|
|
105
|
+
public SingleFileProvider(File file)
|
|
106
|
+
{
|
|
107
|
+
this.file = file;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
@Override
|
|
111
|
+
public InputStream openNext() throws IOException
|
|
112
|
+
{
|
|
113
|
+
if (opened) {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
opened = true;
|
|
117
|
+
return new FileInputStream(file);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@Override
|
|
121
|
+
public void close() { }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
public LocalFileInput(PluginTask task, int processorIndex)
|
|
125
|
+
{
|
|
126
|
+
super(task.getBufferAllocator(), new SingleFileProvider(new File(task.getFiles().get(processorIndex))));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
@Override
|
|
130
|
+
public void abort() { }
|
|
131
|
+
|
|
132
|
+
@Override
|
|
133
|
+
public CommitReport commit()
|
|
134
|
+
{
|
|
135
|
+
return Exec.newCommitReport();
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
package org.embulk.standards;
|
|
2
|
+
|
|
3
|
+
import java.io.File;
|
|
4
|
+
import java.io.FileNotFoundException;
|
|
5
|
+
import java.io.FileOutputStream;
|
|
6
|
+
import java.io.IOException;
|
|
7
|
+
import java.io.OutputStream;
|
|
8
|
+
import java.util.ArrayList;
|
|
9
|
+
import java.util.List;
|
|
10
|
+
import org.embulk.config.Config;
|
|
11
|
+
import org.embulk.config.ConfigSource;
|
|
12
|
+
import org.embulk.config.NextConfig;
|
|
13
|
+
import org.embulk.config.CommitReport;
|
|
14
|
+
import org.embulk.config.Task;
|
|
15
|
+
import org.embulk.config.TaskSource;
|
|
16
|
+
import org.embulk.spi.Buffer;
|
|
17
|
+
import org.embulk.spi.FileOutputPlugin;
|
|
18
|
+
import org.embulk.spi.TransactionalFileOutput;
|
|
19
|
+
import org.embulk.spi.Exec;
|
|
20
|
+
import org.slf4j.Logger;
|
|
21
|
+
|
|
22
|
+
public class LocalFileOutputPlugin
|
|
23
|
+
implements FileOutputPlugin
|
|
24
|
+
{
|
|
25
|
+
public interface PluginTask
|
|
26
|
+
extends Task
|
|
27
|
+
{
|
|
28
|
+
@Config("directory")
|
|
29
|
+
public String getDirectory();
|
|
30
|
+
|
|
31
|
+
@Config("file_name")
|
|
32
|
+
public String getFileNameFormat();
|
|
33
|
+
|
|
34
|
+
@Config("file_ext")
|
|
35
|
+
public String getFileNameExtension();
|
|
36
|
+
|
|
37
|
+
// TODO support in FileInputPlugin and FileOutputPlugin
|
|
38
|
+
//@Config("compress_type")
|
|
39
|
+
//public String getCompressType();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
private final Logger log = Exec.getLogger(getClass());
|
|
43
|
+
|
|
44
|
+
@Override
|
|
45
|
+
public NextConfig transaction(ConfigSource config, int processorCount,
|
|
46
|
+
FileOutputPlugin.Control control)
|
|
47
|
+
{
|
|
48
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
|
49
|
+
|
|
50
|
+
control.run(task.dump());
|
|
51
|
+
|
|
52
|
+
return Exec.newNextConfig();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@Override
|
|
56
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
|
|
57
|
+
{
|
|
58
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
59
|
+
|
|
60
|
+
// TODO format path using timestamp
|
|
61
|
+
final String fileName = task.getFileNameFormat();
|
|
62
|
+
|
|
63
|
+
final String pathPrefix = task.getDirectory() + File.separator + fileName;
|
|
64
|
+
final String pathSuffix = task.getFileNameExtension();
|
|
65
|
+
|
|
66
|
+
final List<String> fileNames = new ArrayList<>();
|
|
67
|
+
|
|
68
|
+
return new TransactionalFileOutput() {
|
|
69
|
+
private int fileIndex = 0;
|
|
70
|
+
private FileOutputStream output = null;
|
|
71
|
+
|
|
72
|
+
public void nextFile()
|
|
73
|
+
{
|
|
74
|
+
closeFile();
|
|
75
|
+
String path = pathPrefix + String.format(".%03d.%02d.", processorIndex, fileIndex) + pathSuffix;
|
|
76
|
+
log.info("Writing local file '{}'", path);
|
|
77
|
+
fileNames.add(path);
|
|
78
|
+
try {
|
|
79
|
+
output = new FileOutputStream(new File(path));
|
|
80
|
+
} catch (FileNotFoundException ex) {
|
|
81
|
+
throw new RuntimeException(ex); // TODO exception class
|
|
82
|
+
}
|
|
83
|
+
fileIndex++;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private void closeFile()
|
|
87
|
+
{
|
|
88
|
+
if (output != null) {
|
|
89
|
+
try {
|
|
90
|
+
output.close();
|
|
91
|
+
} catch (IOException ex) {
|
|
92
|
+
throw new RuntimeException(ex);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
public void add(Buffer buffer)
|
|
98
|
+
{
|
|
99
|
+
try {
|
|
100
|
+
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
|
101
|
+
} catch (IOException ex) {
|
|
102
|
+
throw new RuntimeException(ex);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
public void finish()
|
|
107
|
+
{
|
|
108
|
+
closeFile();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
public void close()
|
|
112
|
+
{
|
|
113
|
+
closeFile();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
public void abort() { }
|
|
117
|
+
|
|
118
|
+
public CommitReport commit()
|
|
119
|
+
{
|
|
120
|
+
CommitReport report = Exec.newCommitReport();
|
|
121
|
+
// TODO better setting for Report
|
|
122
|
+
// report.set("file_names", fileNames);
|
|
123
|
+
// report.set("file_sizes", fileSizes);
|
|
124
|
+
return report;
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
package org.embulk.standards;
|
|
2
|
+
|
|
3
|
+
import org.embulk.config.ConfigSource;
|
|
4
|
+
import org.embulk.config.TaskSource;
|
|
5
|
+
import org.embulk.config.NextConfig;
|
|
6
|
+
import org.embulk.config.CommitReport;
|
|
7
|
+
import org.embulk.spi.Schema;
|
|
8
|
+
import org.embulk.spi.Page;
|
|
9
|
+
import org.embulk.spi.Exec;
|
|
10
|
+
import org.embulk.spi.OutputPlugin;
|
|
11
|
+
import org.embulk.spi.TransactionalPageOutput;
|
|
12
|
+
|
|
13
|
+
public class NullOutputPlugin
|
|
14
|
+
implements OutputPlugin
|
|
15
|
+
{
|
|
16
|
+
@Override
|
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
|
18
|
+
Schema schema, int processorCount,
|
|
19
|
+
OutputPlugin.Control control)
|
|
20
|
+
{
|
|
21
|
+
control.run(Exec.newTaskSource());
|
|
22
|
+
return Exec.newNextConfig();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
@Override
|
|
26
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
|
|
27
|
+
{
|
|
28
|
+
return new TransactionalPageOutput() {
|
|
29
|
+
public void add(Page page)
|
|
30
|
+
{
|
|
31
|
+
page.release();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
public void finish() { }
|
|
35
|
+
|
|
36
|
+
public void close() { }
|
|
37
|
+
|
|
38
|
+
public void abort() { }
|
|
39
|
+
|
|
40
|
+
public CommitReport commit()
|
|
41
|
+
{
|
|
42
|
+
return Exec.newCommitReport();
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
package org.embulk.standards;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.io.IOException;
|
|
5
|
+
import java.io.InputStream;
|
|
6
|
+
|
|
7
|
+
import com.amazonaws.AmazonClientException;
|
|
8
|
+
import com.amazonaws.AmazonServiceException;
|
|
9
|
+
import com.google.common.collect.ImmutableList;
|
|
10
|
+
import com.google.common.base.Optional;
|
|
11
|
+
import com.fasterxml.jackson.annotation.JacksonInject;
|
|
12
|
+
import com.amazonaws.auth.AWSCredentials;
|
|
13
|
+
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
14
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
|
15
|
+
import com.amazonaws.services.s3.AmazonS3Client;
|
|
16
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
|
17
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
|
18
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
|
19
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
|
20
|
+
import com.amazonaws.services.s3.model.S3Object;
|
|
21
|
+
import com.amazonaws.ClientConfiguration;
|
|
22
|
+
import org.embulk.config.Config;
|
|
23
|
+
import org.embulk.config.Task;
|
|
24
|
+
import org.embulk.config.TaskSource;
|
|
25
|
+
import org.embulk.config.ConfigSource;
|
|
26
|
+
import org.embulk.config.NextConfig;
|
|
27
|
+
import org.embulk.config.CommitReport;
|
|
28
|
+
import org.embulk.spi.BufferAllocator;
|
|
29
|
+
import org.embulk.spi.Exec;
|
|
30
|
+
import org.embulk.spi.FileInputPlugin;
|
|
31
|
+
import org.embulk.spi.TransactionalFileInput;
|
|
32
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
|
33
|
+
import org.slf4j.Logger;
|
|
34
|
+
|
|
35
|
+
import static org.embulk.spi.util.Inputs.formatPath;
|
|
36
|
+
|
|
37
|
+
public class S3FileInputPlugin
|
|
38
|
+
implements FileInputPlugin
|
|
39
|
+
{
|
|
40
|
+
public interface PluginTask
|
|
41
|
+
extends Task
|
|
42
|
+
{
|
|
43
|
+
@Config("bucket")
|
|
44
|
+
public String getBucket();
|
|
45
|
+
|
|
46
|
+
@Config("paths")
|
|
47
|
+
public List<String> getPathPrefixes();
|
|
48
|
+
|
|
49
|
+
@Config("endpoint")
|
|
50
|
+
public Optional<String> getEndpoint();
|
|
51
|
+
|
|
52
|
+
// TODO timeout, ssl, etc
|
|
53
|
+
|
|
54
|
+
@Config("access_key_id")
|
|
55
|
+
public String getAccessKeyId();
|
|
56
|
+
|
|
57
|
+
@Config("secret_access_key")
|
|
58
|
+
public String getSecretAccessKey();
|
|
59
|
+
|
|
60
|
+
// TODO support more options such as STS
|
|
61
|
+
|
|
62
|
+
public List<String> getFiles();
|
|
63
|
+
public void setFiles(List<String> files);
|
|
64
|
+
|
|
65
|
+
@JacksonInject
|
|
66
|
+
public BufferAllocator getBufferAllocator();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
private final Logger log = Exec.getLogger(getClass());
|
|
70
|
+
|
|
71
|
+
@Override
|
|
72
|
+
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
|
73
|
+
{
|
|
74
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
|
75
|
+
|
|
76
|
+
// list files recursively
|
|
77
|
+
task.setFiles(listFiles(task));
|
|
78
|
+
|
|
79
|
+
// number of processors is same with number of files
|
|
80
|
+
|
|
81
|
+
// run
|
|
82
|
+
control.run(task.dump(), task.getFiles().size());
|
|
83
|
+
|
|
84
|
+
return Exec.newNextConfig();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
|
|
88
|
+
{
|
|
89
|
+
final AWSCredentials cred = new BasicAWSCredentials(
|
|
90
|
+
task.getAccessKeyId(), task.getSecretAccessKey());
|
|
91
|
+
return new AWSCredentialsProvider() {
|
|
92
|
+
public AWSCredentials getCredentials()
|
|
93
|
+
{
|
|
94
|
+
return cred;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
public void refresh()
|
|
98
|
+
{
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private static AmazonS3Client newS3Client(PluginTask task)
|
|
104
|
+
{
|
|
105
|
+
AWSCredentialsProvider credentials = getCredentialsProvider(task);
|
|
106
|
+
AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
|
|
107
|
+
return client;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
|
|
111
|
+
Optional<String> endpoint)
|
|
112
|
+
{
|
|
113
|
+
// TODO get config from AmazonS3Task
|
|
114
|
+
ClientConfiguration clientConfig = new ClientConfiguration();
|
|
115
|
+
//clientConfig.setProtocol(Protocol.HTTP);
|
|
116
|
+
clientConfig.setMaxConnections(50); // SDK default: 50
|
|
117
|
+
clientConfig.setMaxErrorRetry(3); // SDK default: 3
|
|
118
|
+
clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
|
|
119
|
+
|
|
120
|
+
AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
|
|
121
|
+
|
|
122
|
+
if (endpoint.isPresent()) {
|
|
123
|
+
client.setEndpoint(endpoint.get());
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return client;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
public List<String> listFiles(PluginTask task)
|
|
130
|
+
{
|
|
131
|
+
AmazonS3Client client = newS3Client(task);
|
|
132
|
+
String bucketName = task.getBucket();
|
|
133
|
+
|
|
134
|
+
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
|
135
|
+
for (String prefix : task.getPathPrefixes()) {
|
|
136
|
+
String formatted = formatPath(prefix);
|
|
137
|
+
try {
|
|
138
|
+
log.info("Listing S3 files with prefix '{}'", formatted);
|
|
139
|
+
builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
|
|
140
|
+
} catch (RuntimeException e) {
|
|
141
|
+
throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return builder.build();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Lists S3 filenames filtered by prefix.
|
|
150
|
+
*
|
|
151
|
+
* The resulting list does not include the file that's size == 0.
|
|
152
|
+
*/
|
|
153
|
+
public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
|
|
154
|
+
{
|
|
155
|
+
// TODO implement retrying
|
|
156
|
+
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
|
157
|
+
|
|
158
|
+
String lastKey = null;
|
|
159
|
+
do {
|
|
160
|
+
ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
|
|
161
|
+
ObjectListing ol = client.listObjects(req);
|
|
162
|
+
for(S3ObjectSummary s : ol.getObjectSummaries()) {
|
|
163
|
+
if (s.getSize() > 0) {
|
|
164
|
+
builder.add(s.getKey());
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
lastKey = ol.getNextMarker();
|
|
168
|
+
} while(lastKey != null);
|
|
169
|
+
|
|
170
|
+
return builder.build();
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
@Override
|
|
174
|
+
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
|
175
|
+
{
|
|
176
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
177
|
+
return new S3FileInput(task, processorIndex);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
public static class S3FileInput
|
|
181
|
+
extends InputStreamFileInput
|
|
182
|
+
implements TransactionalFileInput
|
|
183
|
+
{
|
|
184
|
+
// TODO create single-file InputStreamFileInput utility
|
|
185
|
+
private static class SingleFileProvider
|
|
186
|
+
implements InputStreamFileInput.Provider
|
|
187
|
+
{
|
|
188
|
+
private AmazonS3Client client;
|
|
189
|
+
private final String bucket;
|
|
190
|
+
private final String key;
|
|
191
|
+
private boolean opened = false;
|
|
192
|
+
|
|
193
|
+
public SingleFileProvider(PluginTask task, int processorIndex)
|
|
194
|
+
{
|
|
195
|
+
this.client = newS3Client(task);
|
|
196
|
+
this.bucket = task.getBucket();
|
|
197
|
+
this.key = task.getFiles().get(processorIndex);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
@Override
|
|
201
|
+
public InputStream openNext() throws IOException
|
|
202
|
+
{
|
|
203
|
+
if (opened) {
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
opened = true;
|
|
207
|
+
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
|
208
|
+
//if (pos > 0) {
|
|
209
|
+
// request.setRange(pos, contentLength);
|
|
210
|
+
//}
|
|
211
|
+
S3Object obj = client.getObject(request);
|
|
212
|
+
//if (pos <= 0) {
|
|
213
|
+
// // first call
|
|
214
|
+
// contentLength = obj.getObjectMetadata().getContentLength();
|
|
215
|
+
//}
|
|
216
|
+
return obj.getObjectContent();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
@Override
|
|
220
|
+
public void close() { }
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
public S3FileInput(PluginTask task, int processorIndex)
|
|
224
|
+
{
|
|
225
|
+
super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
public void abort() { }
|
|
229
|
+
|
|
230
|
+
public CommitReport commit()
|
|
231
|
+
{
|
|
232
|
+
return Exec.newCommitReport();
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
@Override
|
|
236
|
+
public void close() { }
|
|
237
|
+
}
|
|
238
|
+
}
|