embulk 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/.travis.yml +16 -0
- data/Gemfile +0 -1
- data/README.md +37 -19
- data/Rakefile +5 -37
- data/bin/embulk +1 -1
- data/build.gradle +178 -95
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/Makefile +178 -0
- data/embulk-docs/build.gradle +20 -0
- data/embulk-docs/make.bat +243 -0
- data/embulk-docs/push-gh-pages.sh +29 -0
- data/embulk-docs/src/conf.py +260 -0
- data/embulk-docs/src/index.rst +19 -0
- data/embulk-docs/src/release.rst +14 -0
- data/embulk-docs/src/release/release-0.1.0.rst +8 -0
- data/embulk-docs/src/release/release-0.2.0.rst +16 -0
- data/embulk-docs/src/release/release-0.2.1.rst +19 -0
- data/embulk-docs/src/release/release-0.3.0.rst +34 -0
- data/embulk-docs/src/release/release-0.3.1.rst +11 -0
- data/embulk-docs/src/release/release-0.3.2.rst +15 -0
- data/embulk-docs/src/release/release-0.4.0.rst +74 -0
- data/embulk-standards/build.gradle +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/buffer.rb +2 -2
- data/lib/embulk/column.rb +6 -6
- data/lib/embulk/command/embulk_example.rb +1 -1
- data/lib/embulk/command/embulk_new_plugin.rb +87 -0
- data/lib/embulk/command/embulk_run.rb +84 -26
- data/lib/embulk/data/bundle/Gemfile +12 -20
- data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
- data/lib/embulk/data/new/LICENSE.txt +21 -0
- data/lib/embulk/data/new/README.md.erb +75 -0
- data/lib/embulk/data/new/gitignore.erb +12 -0
- data/lib/embulk/data/new/java/build.gradle.erb +57 -0
- data/lib/embulk/data/new/java/decoder.java.erb +40 -0
- data/lib/embulk/data/new/java/encoder.java.erb +40 -0
- data/lib/embulk/data/new/java/file_input.java.erb +64 -0
- data/lib/embulk/data/new/java/file_output.java.erb +66 -0
- data/lib/embulk/data/new/java/filter.java.erb +47 -0
- data/lib/embulk/data/new/java/formatter.java.erb +45 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/lib/embulk/data/new/java/gradlew +164 -0
- data/lib/embulk/data/new/java/gradlew.bat +90 -0
- data/lib/embulk/data/new/java/input.java.erb +69 -0
- data/lib/embulk/data/new/java/output.java.erb +65 -0
- data/lib/embulk/data/new/java/parser.java.erb +51 -0
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
- data/lib/embulk/data/new/java/test.java.erb +5 -0
- data/lib/embulk/data/new/ruby/Gemfile +2 -0
- data/lib/embulk/data/new/ruby/Rakefile +1 -0
- data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
- data/lib/embulk/data/package_data.rb +64 -0
- data/lib/embulk/data_source.rb +2 -2
- data/lib/embulk/decoder_plugin.rb +27 -0
- data/lib/embulk/encoder_plugin.rb +27 -0
- data/lib/embulk/error.rb +3 -0
- data/lib/embulk/file_input_plugin.rb +27 -0
- data/lib/embulk/file_output_plugin.rb +27 -0
- data/lib/embulk/filter_plugin.rb +28 -9
- data/lib/embulk/formatter_plugin.rb +105 -0
- data/lib/embulk/guess_csv.rb +10 -1
- data/lib/embulk/guess_plugin.rb +22 -27
- data/lib/embulk/input_plugin.rb +34 -20
- data/lib/embulk/java/bootstrap.rb +5 -0
- data/lib/embulk/java/imports.rb +7 -0
- data/lib/embulk/java_plugin.rb +84 -0
- data/lib/embulk/output_plugin.rb +35 -19
- data/lib/embulk/page.rb +1 -1
- data/lib/embulk/page_builder.rb +1 -1
- data/lib/embulk/parser_plugin.rb +76 -0
- data/lib/embulk/plugin.rb +130 -65
- data/lib/embulk/plugin_registry.rb +19 -8
- data/lib/embulk/schema.rb +4 -4
- data/lib/embulk/version.rb +1 -1
- data/settings.gradle +1 -0
- metadata +123 -90
- data/ChangeLog +0 -46
- data/embulk-cli/pom.xml +0 -94
- data/embulk-core/pom.xml +0 -148
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
- data/embulk-standards/pom.xml +0 -68
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
- data/pom.xml +0 -541
data/ChangeLog
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
|
2
|
-
2015-02-04 version 0.3.2:
|
3
|
-
|
4
|
-
* Fixed a problem where ruby input plugins can't use timestamp type (reported
|
5
|
-
by @shun0102)
|
6
|
-
* Embulk::Page includes Enumerable to include map, each_with_index, and other
|
7
|
-
a lot of convenient methods (@niku++)
|
8
|
-
* Fixed TimestampType::DEFAULT_FORMAT to use ':' as the separator of times
|
9
|
-
|
10
|
-
|
11
|
-
2015-02-04 version 0.3.1:
|
12
|
-
|
13
|
-
* Added -C, --classpath option to add java classpath
|
14
|
-
|
15
|
-
|
16
|
-
2015-02-03 version 0.3.0:
|
17
|
-
|
18
|
-
* Added resume functionality. InputPlugin and OutputPlugin needs to implement
|
19
|
-
resume and cleanup methods.
|
20
|
-
* cli: embulk-run supports -r, --resume-state PATH option.
|
21
|
-
* Added FilterInputPlugin Java API.
|
22
|
-
* Added FilterInputPlugin JRuby API.
|
23
|
-
* Configuration file accepts filters: array entry.
|
24
|
-
* Added gradle-versions-plugin to build.gradle (@seratch++)
|
25
|
-
* Fixed broken dependencies at build.gradle (@thagikura++)
|
26
|
-
|
27
|
-
|
28
|
-
2015-01-29 version 0.2.1:
|
29
|
-
|
30
|
-
* Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
|
31
|
-
* Fixed NextConfig to be merged to in: or out: rather than the top-level
|
32
|
-
(reported by enukane) [#41]
|
33
|
-
* ./bin/embulk shows warns to run `rake` if ./classpath doesn't exist
|
34
|
-
* Embulk::PageBuilder#add accepts nil
|
35
|
-
|
36
|
-
|
37
|
-
2015-01-26 version 0.2.0:
|
38
|
-
|
39
|
-
* Changed JRuby InputPlugin API to use #run instead of .run
|
40
|
-
* Fixed 'example' subcommand to quote file path for Windows environment
|
41
|
-
|
42
|
-
|
43
|
-
2015-01-26 version 0.1.0:
|
44
|
-
|
45
|
-
* The first release
|
46
|
-
|
data/embulk-cli/pom.xml
DELETED
@@ -1,94 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
4
|
-
|
5
|
-
<parent>
|
6
|
-
<groupId>org.embulk</groupId>
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
9
|
-
</parent>
|
10
|
-
|
11
|
-
<artifactId>embulk-cli</artifactId>
|
12
|
-
<name>embulk-cli</name>
|
13
|
-
|
14
|
-
<dependencies>
|
15
|
-
<dependency>
|
16
|
-
<groupId>org.jruby</groupId>
|
17
|
-
<artifactId>jruby-complete</artifactId>
|
18
|
-
</dependency>
|
19
|
-
|
20
|
-
<dependency>
|
21
|
-
<groupId>org.embulk</groupId>
|
22
|
-
<artifactId>embulk-core</artifactId>
|
23
|
-
</dependency>
|
24
|
-
|
25
|
-
<!-- TODO move embulk-standards to another repository -->
|
26
|
-
<!-- TODO once embulk-standards is moved to another repository, replace this dependency with gem from rubygems-proxy -->
|
27
|
-
<!-- and change Rakefile to include embulk-core instead of embulk-cli and gemspec depends on embulk-standards -->
|
28
|
-
<dependency>
|
29
|
-
<groupId>org.embulk</groupId>
|
30
|
-
<artifactId>embulk-standards</artifactId>
|
31
|
-
</dependency>
|
32
|
-
</dependencies>
|
33
|
-
|
34
|
-
<repositories>
|
35
|
-
<!-- See https://github.com/jruby/jruby/wiki/Jruby-Scripting-container-using-Gems-with-a-Maven-Project -->
|
36
|
-
<repository>
|
37
|
-
<id>rubygems-proxy</id>
|
38
|
-
<name>RubyGems Proxy</name>
|
39
|
-
<url>http://rubygems-proxy.torquebox.org/releases</url>
|
40
|
-
<layout>default</layout>
|
41
|
-
<releases>
|
42
|
-
<enabled>true</enabled>
|
43
|
-
</releases>
|
44
|
-
<snapshots>
|
45
|
-
<enabled>false</enabled>
|
46
|
-
<updatePolicy>never</updatePolicy>
|
47
|
-
</snapshots>
|
48
|
-
</repository>
|
49
|
-
</repositories>
|
50
|
-
|
51
|
-
<build>
|
52
|
-
<plugins>
|
53
|
-
<plugin>
|
54
|
-
<groupId>de.saumya.mojo</groupId>
|
55
|
-
<artifactId>gem-maven-plugin</artifactId>
|
56
|
-
<version>1.0.0</version>
|
57
|
-
<configuration>
|
58
|
-
<includeRubygemsInResources>true</includeRubygemsInResources>
|
59
|
-
</configuration>
|
60
|
-
<executions>
|
61
|
-
<execution>
|
62
|
-
<goals>
|
63
|
-
<goal>initialize</goal>
|
64
|
-
</goals>
|
65
|
-
</execution>
|
66
|
-
</executions>
|
67
|
-
</plugin>
|
68
|
-
|
69
|
-
<plugin>
|
70
|
-
<groupId>org.apache.maven.plugins</groupId>
|
71
|
-
<artifactId>maven-shade-plugin</artifactId>
|
72
|
-
<executions>
|
73
|
-
<execution>
|
74
|
-
<phase>package</phase>
|
75
|
-
<goals>
|
76
|
-
<goal>shade</goal>
|
77
|
-
</goals>
|
78
|
-
<configuration>
|
79
|
-
<shadedArtifactAttached>true</shadedArtifactAttached>
|
80
|
-
<shadedClassifierName>executable</shadedClassifierName>
|
81
|
-
<transformers>
|
82
|
-
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
83
|
-
<manifestEntries>
|
84
|
-
<Main-Class>org.embulk.cli.Main</Main-Class>
|
85
|
-
</manifestEntries>
|
86
|
-
</transformer>
|
87
|
-
</transformers>
|
88
|
-
</configuration>
|
89
|
-
</execution>
|
90
|
-
</executions>
|
91
|
-
</plugin>
|
92
|
-
</plugins>
|
93
|
-
</build>
|
94
|
-
</project>
|
data/embulk-core/pom.xml
DELETED
@@ -1,148 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
4
|
-
|
5
|
-
<parent>
|
6
|
-
<groupId>org.embulk</groupId>
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
9
|
-
</parent>
|
10
|
-
|
11
|
-
<artifactId>embulk-core</artifactId>
|
12
|
-
<name>embulk-core</name>
|
13
|
-
|
14
|
-
<dependencies>
|
15
|
-
<dependency>
|
16
|
-
<groupId>commons-logging</groupId>
|
17
|
-
<artifactId>commons-logging</artifactId>
|
18
|
-
</dependency>
|
19
|
-
|
20
|
-
<dependency>
|
21
|
-
<groupId>com.google.guava</groupId>
|
22
|
-
<artifactId>guava</artifactId>
|
23
|
-
</dependency>
|
24
|
-
|
25
|
-
<dependency>
|
26
|
-
<groupId>com.google.inject</groupId>
|
27
|
-
<artifactId>guice</artifactId>
|
28
|
-
</dependency>
|
29
|
-
|
30
|
-
<dependency>
|
31
|
-
<groupId>com.google.inject.extensions</groupId>
|
32
|
-
<artifactId>guice-multibindings</artifactId>
|
33
|
-
</dependency>
|
34
|
-
|
35
|
-
<dependency>
|
36
|
-
<groupId>javax.inject</groupId>
|
37
|
-
<artifactId>javax.inject</artifactId>
|
38
|
-
</dependency>
|
39
|
-
|
40
|
-
<dependency>
|
41
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
42
|
-
<artifactId>jackson-databind</artifactId>
|
43
|
-
</dependency>
|
44
|
-
|
45
|
-
<dependency>
|
46
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
47
|
-
<artifactId>jackson-annotations</artifactId>
|
48
|
-
</dependency>
|
49
|
-
|
50
|
-
<dependency>
|
51
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
52
|
-
<artifactId>jackson-core</artifactId>
|
53
|
-
</dependency>
|
54
|
-
|
55
|
-
<dependency>
|
56
|
-
<groupId>com.fasterxml.jackson.datatype</groupId>
|
57
|
-
<artifactId>jackson-datatype-guava</artifactId>
|
58
|
-
</dependency>
|
59
|
-
|
60
|
-
<dependency>
|
61
|
-
<groupId>com.fasterxml.jackson.module</groupId>
|
62
|
-
<artifactId>jackson-module-guice</artifactId>
|
63
|
-
</dependency>
|
64
|
-
|
65
|
-
<dependency>
|
66
|
-
<groupId>com.fasterxml.jackson.datatype</groupId>
|
67
|
-
<artifactId>jackson-datatype-joda</artifactId>
|
68
|
-
</dependency>
|
69
|
-
|
70
|
-
<dependency>
|
71
|
-
<groupId>org.slf4j</groupId>
|
72
|
-
<artifactId>slf4j-api</artifactId>
|
73
|
-
</dependency>
|
74
|
-
|
75
|
-
<dependency>
|
76
|
-
<groupId>org.slf4j</groupId>
|
77
|
-
<artifactId>slf4j-log4j12</artifactId>
|
78
|
-
</dependency>
|
79
|
-
|
80
|
-
<dependency>
|
81
|
-
<groupId>log4j</groupId>
|
82
|
-
<artifactId>log4j</artifactId>
|
83
|
-
</dependency>
|
84
|
-
|
85
|
-
<dependency>
|
86
|
-
<groupId>joda-time</groupId>
|
87
|
-
<artifactId>joda-time</artifactId>
|
88
|
-
</dependency>
|
89
|
-
|
90
|
-
<dependency>
|
91
|
-
<groupId>org.yaml</groupId>
|
92
|
-
<artifactId>snakeyaml</artifactId>
|
93
|
-
</dependency>
|
94
|
-
|
95
|
-
<dependency>
|
96
|
-
<groupId>javax.validation</groupId>
|
97
|
-
<artifactId>validation-api</artifactId>
|
98
|
-
</dependency>
|
99
|
-
|
100
|
-
<dependency>
|
101
|
-
<groupId>org.apache.bval</groupId>
|
102
|
-
<artifactId>bval-jsr303</artifactId>
|
103
|
-
</dependency>
|
104
|
-
|
105
|
-
<dependency>
|
106
|
-
<groupId>io.airlift</groupId>
|
107
|
-
<artifactId>slice</artifactId>
|
108
|
-
</dependency>
|
109
|
-
|
110
|
-
<dependency>
|
111
|
-
<groupId>io.netty</groupId>
|
112
|
-
<artifactId>netty-buffer</artifactId>
|
113
|
-
</dependency>
|
114
|
-
|
115
|
-
<dependency>
|
116
|
-
<groupId>org.jruby</groupId>
|
117
|
-
<artifactId>jruby-complete</artifactId>
|
118
|
-
</dependency>
|
119
|
-
|
120
|
-
<dependency>
|
121
|
-
<groupId>com.google.code.findbugs</groupId>
|
122
|
-
<artifactId>annotations</artifactId>
|
123
|
-
</dependency>
|
124
|
-
|
125
|
-
<!-- for guess_charset plugin -->
|
126
|
-
<dependency>
|
127
|
-
<groupId>com.ibm.icu</groupId>
|
128
|
-
<artifactId>icu4j</artifactId>
|
129
|
-
<scope>runtime</scope>
|
130
|
-
</dependency>
|
131
|
-
|
132
|
-
<dependency>
|
133
|
-
<groupId>junit</groupId>
|
134
|
-
<artifactId>junit</artifactId>
|
135
|
-
</dependency>
|
136
|
-
</dependencies>
|
137
|
-
|
138
|
-
<build>
|
139
|
-
<resources>
|
140
|
-
<resource>
|
141
|
-
<directory>src/main/resources</directory>
|
142
|
-
</resource>
|
143
|
-
<resource>
|
144
|
-
<directory>../lib</directory>
|
145
|
-
</resource>
|
146
|
-
</resources>
|
147
|
-
</build>
|
148
|
-
</project>
|
@@ -1,26 +0,0 @@
|
|
1
|
-
package org.embulk.config;
|
2
|
-
|
3
|
-
public interface NextConfig
|
4
|
-
extends DataSource
|
5
|
-
{
|
6
|
-
@Override
|
7
|
-
public NextConfig getNested(String attrName);
|
8
|
-
|
9
|
-
@Override
|
10
|
-
public NextConfig getNestedOrSetEmpty(String attrName);
|
11
|
-
|
12
|
-
@Override
|
13
|
-
public NextConfig set(String attrName, Object v);
|
14
|
-
|
15
|
-
@Override
|
16
|
-
public NextConfig setNested(String attrName, DataSource v);
|
17
|
-
|
18
|
-
@Override
|
19
|
-
public NextConfig setAll(DataSource other);
|
20
|
-
|
21
|
-
@Override
|
22
|
-
public NextConfig deepCopy();
|
23
|
-
|
24
|
-
@Override
|
25
|
-
public NextConfig merge(DataSource other);
|
26
|
-
}
|
data/embulk-standards/pom.xml
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
4
|
-
|
5
|
-
<parent>
|
6
|
-
<groupId>org.embulk</groupId>
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
9
|
-
</parent>
|
10
|
-
|
11
|
-
<artifactId>embulk-standards</artifactId>
|
12
|
-
<name>embulk-standards</name>
|
13
|
-
|
14
|
-
<dependencies>
|
15
|
-
<dependency>
|
16
|
-
<groupId>org.embulk</groupId>
|
17
|
-
<artifactId>embulk-core</artifactId>
|
18
|
-
</dependency>
|
19
|
-
|
20
|
-
<dependency>
|
21
|
-
<groupId>org.embulk</groupId>
|
22
|
-
<artifactId>embulk-core</artifactId>
|
23
|
-
<type>test-jar</type>
|
24
|
-
<scope>test</scope>
|
25
|
-
</dependency>
|
26
|
-
|
27
|
-
<dependency>
|
28
|
-
<groupId>com.google.guava</groupId>
|
29
|
-
<artifactId>guava</artifactId>
|
30
|
-
</dependency>
|
31
|
-
|
32
|
-
<dependency>
|
33
|
-
<groupId>com.google.inject</groupId>
|
34
|
-
<artifactId>guice</artifactId>
|
35
|
-
</dependency>
|
36
|
-
|
37
|
-
<dependency>
|
38
|
-
<groupId>javax.validation</groupId>
|
39
|
-
<artifactId>validation-api</artifactId>
|
40
|
-
</dependency>
|
41
|
-
|
42
|
-
<dependency>
|
43
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
44
|
-
<artifactId>jackson-databind</artifactId>
|
45
|
-
</dependency>
|
46
|
-
|
47
|
-
<dependency>
|
48
|
-
<groupId>org.slf4j</groupId>
|
49
|
-
<artifactId>slf4j-api</artifactId>
|
50
|
-
</dependency>
|
51
|
-
|
52
|
-
<dependency>
|
53
|
-
<groupId>com.amazonaws</groupId>
|
54
|
-
<artifactId>aws-java-sdk</artifactId>
|
55
|
-
<version>1.5.2</version>
|
56
|
-
</dependency>
|
57
|
-
|
58
|
-
<dependency>
|
59
|
-
<groupId>junit</groupId>
|
60
|
-
<artifactId>junit</artifactId>
|
61
|
-
</dependency>
|
62
|
-
|
63
|
-
<dependency>
|
64
|
-
<groupId>org.mockito</groupId>
|
65
|
-
<artifactId>mockito-core</artifactId>
|
66
|
-
</dependency>
|
67
|
-
</dependencies>
|
68
|
-
</project>
|
@@ -1,250 +0,0 @@
|
|
1
|
-
package org.embulk.standards;
|
2
|
-
|
3
|
-
import java.util.List;
|
4
|
-
import java.io.IOException;
|
5
|
-
import java.io.InputStream;
|
6
|
-
|
7
|
-
import com.amazonaws.AmazonClientException;
|
8
|
-
import com.amazonaws.AmazonServiceException;
|
9
|
-
import com.google.common.collect.ImmutableList;
|
10
|
-
import com.google.common.base.Optional;
|
11
|
-
import com.fasterxml.jackson.annotation.JacksonInject;
|
12
|
-
import com.amazonaws.auth.AWSCredentials;
|
13
|
-
import com.amazonaws.auth.AWSCredentialsProvider;
|
14
|
-
import com.amazonaws.auth.BasicAWSCredentials;
|
15
|
-
import com.amazonaws.services.s3.AmazonS3Client;
|
16
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
17
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
19
|
-
import com.amazonaws.services.s3.model.GetObjectRequest;
|
20
|
-
import com.amazonaws.services.s3.model.S3Object;
|
21
|
-
import com.amazonaws.ClientConfiguration;
|
22
|
-
import org.embulk.config.Config;
|
23
|
-
import org.embulk.config.Task;
|
24
|
-
import org.embulk.config.TaskSource;
|
25
|
-
import org.embulk.config.ConfigSource;
|
26
|
-
import org.embulk.config.NextConfig;
|
27
|
-
import org.embulk.config.CommitReport;
|
28
|
-
import org.embulk.spi.BufferAllocator;
|
29
|
-
import org.embulk.spi.Exec;
|
30
|
-
import org.embulk.spi.FileInputPlugin;
|
31
|
-
import org.embulk.spi.TransactionalFileInput;
|
32
|
-
import org.embulk.spi.util.InputStreamFileInput;
|
33
|
-
import org.slf4j.Logger;
|
34
|
-
|
35
|
-
import static org.embulk.spi.util.Inputs.formatPath;
|
36
|
-
|
37
|
-
public class S3FileInputPlugin
|
38
|
-
implements FileInputPlugin
|
39
|
-
{
|
40
|
-
public interface PluginTask
|
41
|
-
extends Task
|
42
|
-
{
|
43
|
-
@Config("bucket")
|
44
|
-
public String getBucket();
|
45
|
-
|
46
|
-
@Config("paths")
|
47
|
-
public List<String> getPathPrefixes();
|
48
|
-
|
49
|
-
@Config("endpoint")
|
50
|
-
public Optional<String> getEndpoint();
|
51
|
-
|
52
|
-
// TODO timeout, ssl, etc
|
53
|
-
|
54
|
-
@Config("access_key_id")
|
55
|
-
public String getAccessKeyId();
|
56
|
-
|
57
|
-
@Config("secret_access_key")
|
58
|
-
public String getSecretAccessKey();
|
59
|
-
|
60
|
-
// TODO support more options such as STS
|
61
|
-
|
62
|
-
public List<String> getFiles();
|
63
|
-
public void setFiles(List<String> files);
|
64
|
-
|
65
|
-
@JacksonInject
|
66
|
-
public BufferAllocator getBufferAllocator();
|
67
|
-
}
|
68
|
-
|
69
|
-
private final Logger log = Exec.getLogger(getClass());
|
70
|
-
|
71
|
-
@Override
|
72
|
-
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
73
|
-
{
|
74
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
75
|
-
|
76
|
-
// list files recursively
|
77
|
-
task.setFiles(listFiles(task));
|
78
|
-
|
79
|
-
// number of processors is same with number of files
|
80
|
-
int processorCount = task.getFiles().size();
|
81
|
-
return resume(task.dump(), processorCount, control);
|
82
|
-
}
|
83
|
-
|
84
|
-
@Override
|
85
|
-
public NextConfig resume(TaskSource taskSource,
|
86
|
-
int processorCount,
|
87
|
-
FileInputPlugin.Control control)
|
88
|
-
{
|
89
|
-
control.run(taskSource, processorCount);
|
90
|
-
return Exec.newNextConfig();
|
91
|
-
}
|
92
|
-
|
93
|
-
@Override
|
94
|
-
public void cleanup(TaskSource taskSource,
|
95
|
-
int processorCount,
|
96
|
-
List<CommitReport> successCommitReports)
|
97
|
-
{ }
|
98
|
-
|
99
|
-
public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
|
100
|
-
{
|
101
|
-
final AWSCredentials cred = new BasicAWSCredentials(
|
102
|
-
task.getAccessKeyId(), task.getSecretAccessKey());
|
103
|
-
return new AWSCredentialsProvider() {
|
104
|
-
public AWSCredentials getCredentials()
|
105
|
-
{
|
106
|
-
return cred;
|
107
|
-
}
|
108
|
-
|
109
|
-
public void refresh()
|
110
|
-
{
|
111
|
-
}
|
112
|
-
};
|
113
|
-
}
|
114
|
-
|
115
|
-
private static AmazonS3Client newS3Client(PluginTask task)
|
116
|
-
{
|
117
|
-
AWSCredentialsProvider credentials = getCredentialsProvider(task);
|
118
|
-
AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
|
119
|
-
return client;
|
120
|
-
}
|
121
|
-
|
122
|
-
private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
|
123
|
-
Optional<String> endpoint)
|
124
|
-
{
|
125
|
-
// TODO get config from AmazonS3Task
|
126
|
-
ClientConfiguration clientConfig = new ClientConfiguration();
|
127
|
-
//clientConfig.setProtocol(Protocol.HTTP);
|
128
|
-
clientConfig.setMaxConnections(50); // SDK default: 50
|
129
|
-
clientConfig.setMaxErrorRetry(3); // SDK default: 3
|
130
|
-
clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
|
131
|
-
|
132
|
-
AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
|
133
|
-
|
134
|
-
if (endpoint.isPresent()) {
|
135
|
-
client.setEndpoint(endpoint.get());
|
136
|
-
}
|
137
|
-
|
138
|
-
return client;
|
139
|
-
}
|
140
|
-
|
141
|
-
public List<String> listFiles(PluginTask task)
|
142
|
-
{
|
143
|
-
AmazonS3Client client = newS3Client(task);
|
144
|
-
String bucketName = task.getBucket();
|
145
|
-
|
146
|
-
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
147
|
-
for (String prefix : task.getPathPrefixes()) {
|
148
|
-
String formatted = formatPath(prefix);
|
149
|
-
try {
|
150
|
-
log.info("Listing S3 files with prefix '{}'", formatted);
|
151
|
-
builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
|
152
|
-
} catch (RuntimeException e) {
|
153
|
-
throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
|
154
|
-
}
|
155
|
-
}
|
156
|
-
|
157
|
-
return builder.build();
|
158
|
-
}
|
159
|
-
|
160
|
-
/**
|
161
|
-
* Lists S3 filenames filtered by prefix.
|
162
|
-
*
|
163
|
-
* The resulting list does not include the file that's size == 0.
|
164
|
-
*/
|
165
|
-
public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
|
166
|
-
{
|
167
|
-
// TODO implement retrying
|
168
|
-
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
169
|
-
|
170
|
-
String lastKey = null;
|
171
|
-
do {
|
172
|
-
ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
|
173
|
-
ObjectListing ol = client.listObjects(req);
|
174
|
-
for(S3ObjectSummary s : ol.getObjectSummaries()) {
|
175
|
-
if (s.getSize() > 0) {
|
176
|
-
builder.add(s.getKey());
|
177
|
-
}
|
178
|
-
}
|
179
|
-
lastKey = ol.getNextMarker();
|
180
|
-
} while(lastKey != null);
|
181
|
-
|
182
|
-
return builder.build();
|
183
|
-
}
|
184
|
-
|
185
|
-
@Override
|
186
|
-
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
187
|
-
{
|
188
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
189
|
-
return new S3FileInput(task, processorIndex);
|
190
|
-
}
|
191
|
-
|
192
|
-
public static class S3FileInput
|
193
|
-
extends InputStreamFileInput
|
194
|
-
implements TransactionalFileInput
|
195
|
-
{
|
196
|
-
// TODO create single-file InputStreamFileInput utility
|
197
|
-
private static class SingleFileProvider
|
198
|
-
implements InputStreamFileInput.Provider
|
199
|
-
{
|
200
|
-
private AmazonS3Client client;
|
201
|
-
private final String bucket;
|
202
|
-
private final String key;
|
203
|
-
private boolean opened = false;
|
204
|
-
|
205
|
-
public SingleFileProvider(PluginTask task, int processorIndex)
|
206
|
-
{
|
207
|
-
this.client = newS3Client(task);
|
208
|
-
this.bucket = task.getBucket();
|
209
|
-
this.key = task.getFiles().get(processorIndex);
|
210
|
-
}
|
211
|
-
|
212
|
-
@Override
|
213
|
-
public InputStream openNext() throws IOException
|
214
|
-
{
|
215
|
-
if (opened) {
|
216
|
-
return null;
|
217
|
-
}
|
218
|
-
opened = true;
|
219
|
-
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
220
|
-
//if (pos > 0) {
|
221
|
-
// request.setRange(pos, contentLength);
|
222
|
-
//}
|
223
|
-
S3Object obj = client.getObject(request);
|
224
|
-
//if (pos <= 0) {
|
225
|
-
// // first call
|
226
|
-
// contentLength = obj.getObjectMetadata().getContentLength();
|
227
|
-
//}
|
228
|
-
return obj.getObjectContent();
|
229
|
-
}
|
230
|
-
|
231
|
-
@Override
|
232
|
-
public void close() { }
|
233
|
-
}
|
234
|
-
|
235
|
-
public S3FileInput(PluginTask task, int processorIndex)
|
236
|
-
{
|
237
|
-
super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
|
238
|
-
}
|
239
|
-
|
240
|
-
public void abort() { }
|
241
|
-
|
242
|
-
public CommitReport commit()
|
243
|
-
{
|
244
|
-
return Exec.newCommitReport();
|
245
|
-
}
|
246
|
-
|
247
|
-
@Override
|
248
|
-
public void close() { }
|
249
|
-
}
|
250
|
-
}
|