embulk 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.travis.yml +16 -0
- data/Gemfile +0 -1
- data/README.md +37 -19
- data/Rakefile +5 -37
- data/bin/embulk +1 -1
- data/build.gradle +178 -95
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/Makefile +178 -0
- data/embulk-docs/build.gradle +20 -0
- data/embulk-docs/make.bat +243 -0
- data/embulk-docs/push-gh-pages.sh +29 -0
- data/embulk-docs/src/conf.py +260 -0
- data/embulk-docs/src/index.rst +19 -0
- data/embulk-docs/src/release.rst +14 -0
- data/embulk-docs/src/release/release-0.1.0.rst +8 -0
- data/embulk-docs/src/release/release-0.2.0.rst +16 -0
- data/embulk-docs/src/release/release-0.2.1.rst +19 -0
- data/embulk-docs/src/release/release-0.3.0.rst +34 -0
- data/embulk-docs/src/release/release-0.3.1.rst +11 -0
- data/embulk-docs/src/release/release-0.3.2.rst +15 -0
- data/embulk-docs/src/release/release-0.4.0.rst +74 -0
- data/embulk-standards/build.gradle +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/buffer.rb +2 -2
- data/lib/embulk/column.rb +6 -6
- data/lib/embulk/command/embulk_example.rb +1 -1
- data/lib/embulk/command/embulk_new_plugin.rb +87 -0
- data/lib/embulk/command/embulk_run.rb +84 -26
- data/lib/embulk/data/bundle/Gemfile +12 -20
- data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
- data/lib/embulk/data/new/LICENSE.txt +21 -0
- data/lib/embulk/data/new/README.md.erb +75 -0
- data/lib/embulk/data/new/gitignore.erb +12 -0
- data/lib/embulk/data/new/java/build.gradle.erb +57 -0
- data/lib/embulk/data/new/java/decoder.java.erb +40 -0
- data/lib/embulk/data/new/java/encoder.java.erb +40 -0
- data/lib/embulk/data/new/java/file_input.java.erb +64 -0
- data/lib/embulk/data/new/java/file_output.java.erb +66 -0
- data/lib/embulk/data/new/java/filter.java.erb +47 -0
- data/lib/embulk/data/new/java/formatter.java.erb +45 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/lib/embulk/data/new/java/gradlew +164 -0
- data/lib/embulk/data/new/java/gradlew.bat +90 -0
- data/lib/embulk/data/new/java/input.java.erb +69 -0
- data/lib/embulk/data/new/java/output.java.erb +65 -0
- data/lib/embulk/data/new/java/parser.java.erb +51 -0
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
- data/lib/embulk/data/new/java/test.java.erb +5 -0
- data/lib/embulk/data/new/ruby/Gemfile +2 -0
- data/lib/embulk/data/new/ruby/Rakefile +1 -0
- data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
- data/lib/embulk/data/package_data.rb +64 -0
- data/lib/embulk/data_source.rb +2 -2
- data/lib/embulk/decoder_plugin.rb +27 -0
- data/lib/embulk/encoder_plugin.rb +27 -0
- data/lib/embulk/error.rb +3 -0
- data/lib/embulk/file_input_plugin.rb +27 -0
- data/lib/embulk/file_output_plugin.rb +27 -0
- data/lib/embulk/filter_plugin.rb +28 -9
- data/lib/embulk/formatter_plugin.rb +105 -0
- data/lib/embulk/guess_csv.rb +10 -1
- data/lib/embulk/guess_plugin.rb +22 -27
- data/lib/embulk/input_plugin.rb +34 -20
- data/lib/embulk/java/bootstrap.rb +5 -0
- data/lib/embulk/java/imports.rb +7 -0
- data/lib/embulk/java_plugin.rb +84 -0
- data/lib/embulk/output_plugin.rb +35 -19
- data/lib/embulk/page.rb +1 -1
- data/lib/embulk/page_builder.rb +1 -1
- data/lib/embulk/parser_plugin.rb +76 -0
- data/lib/embulk/plugin.rb +130 -65
- data/lib/embulk/plugin_registry.rb +19 -8
- data/lib/embulk/schema.rb +4 -4
- data/lib/embulk/version.rb +1 -1
- data/settings.gradle +1 -0
- metadata +123 -90
- data/ChangeLog +0 -46
- data/embulk-cli/pom.xml +0 -94
- data/embulk-core/pom.xml +0 -148
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
- data/embulk-standards/pom.xml +0 -68
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
- data/pom.xml +0 -541
data/ChangeLog
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
2015-02-04 version 0.3.2:
|
|
3
|
-
|
|
4
|
-
* Fixed a problem where ruby input plugins can't use timestamp type (reported
|
|
5
|
-
by @shun0102)
|
|
6
|
-
* Embulk::Page includes Enumerable to include map, each_with_index, and other
|
|
7
|
-
a lot of convenient methods (@niku++)
|
|
8
|
-
* Fixed TimestampType::DEFAULT_FORMAT to use ':' as the separator of times
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
2015-02-04 version 0.3.1:
|
|
12
|
-
|
|
13
|
-
* Added -C, --classpath option to add java classpath
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
2015-02-03 version 0.3.0:
|
|
17
|
-
|
|
18
|
-
* Added resume functionality. InputPlugin and OutputPlugin needs to implement
|
|
19
|
-
resume and cleanup methods.
|
|
20
|
-
* cli: embulk-run supports -r, --resume-state PATH option.
|
|
21
|
-
* Added FilterInputPlugin Java API.
|
|
22
|
-
* Added FilterInputPlugin JRuby API.
|
|
23
|
-
* Configuration file accepts filters: array entry.
|
|
24
|
-
* Added gradle-versions-plugin to build.gradle (@seratch++)
|
|
25
|
-
* Fixed broken dependencies at build.gradle (@thagikura++)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
2015-01-29 version 0.2.1:
|
|
29
|
-
|
|
30
|
-
* Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
|
|
31
|
-
* Fixed NextConfig to be merged to in: or out: rather than the top-level
|
|
32
|
-
(reported by enukane) [#41]
|
|
33
|
-
* ./bin/embulk shows warns to run `rake` if ./classpath doesn't exist
|
|
34
|
-
* Embulk::PageBuilder#add accepts nil
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
2015-01-26 version 0.2.0:
|
|
38
|
-
|
|
39
|
-
* Changed JRuby InputPlugin API to use #run instead of .run
|
|
40
|
-
* Fixed 'example' subcommand to quote file path for Windows environment
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
2015-01-26 version 0.1.0:
|
|
44
|
-
|
|
45
|
-
* The first release
|
|
46
|
-
|
data/embulk-cli/pom.xml
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
|
4
|
-
|
|
5
|
-
<parent>
|
|
6
|
-
<groupId>org.embulk</groupId>
|
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
|
9
|
-
</parent>
|
|
10
|
-
|
|
11
|
-
<artifactId>embulk-cli</artifactId>
|
|
12
|
-
<name>embulk-cli</name>
|
|
13
|
-
|
|
14
|
-
<dependencies>
|
|
15
|
-
<dependency>
|
|
16
|
-
<groupId>org.jruby</groupId>
|
|
17
|
-
<artifactId>jruby-complete</artifactId>
|
|
18
|
-
</dependency>
|
|
19
|
-
|
|
20
|
-
<dependency>
|
|
21
|
-
<groupId>org.embulk</groupId>
|
|
22
|
-
<artifactId>embulk-core</artifactId>
|
|
23
|
-
</dependency>
|
|
24
|
-
|
|
25
|
-
<!-- TODO move embulk-standards to another repository -->
|
|
26
|
-
<!-- TODO once embulk-standards is moved to another repository, replace this dependency with gem from rubygems-proxy -->
|
|
27
|
-
<!-- and change Rakefile to include embulk-core instead of embulk-cli and gemspec depends on embulk-standards -->
|
|
28
|
-
<dependency>
|
|
29
|
-
<groupId>org.embulk</groupId>
|
|
30
|
-
<artifactId>embulk-standards</artifactId>
|
|
31
|
-
</dependency>
|
|
32
|
-
</dependencies>
|
|
33
|
-
|
|
34
|
-
<repositories>
|
|
35
|
-
<!-- See https://github.com/jruby/jruby/wiki/Jruby-Scripting-container-using-Gems-with-a-Maven-Project -->
|
|
36
|
-
<repository>
|
|
37
|
-
<id>rubygems-proxy</id>
|
|
38
|
-
<name>RubyGems Proxy</name>
|
|
39
|
-
<url>http://rubygems-proxy.torquebox.org/releases</url>
|
|
40
|
-
<layout>default</layout>
|
|
41
|
-
<releases>
|
|
42
|
-
<enabled>true</enabled>
|
|
43
|
-
</releases>
|
|
44
|
-
<snapshots>
|
|
45
|
-
<enabled>false</enabled>
|
|
46
|
-
<updatePolicy>never</updatePolicy>
|
|
47
|
-
</snapshots>
|
|
48
|
-
</repository>
|
|
49
|
-
</repositories>
|
|
50
|
-
|
|
51
|
-
<build>
|
|
52
|
-
<plugins>
|
|
53
|
-
<plugin>
|
|
54
|
-
<groupId>de.saumya.mojo</groupId>
|
|
55
|
-
<artifactId>gem-maven-plugin</artifactId>
|
|
56
|
-
<version>1.0.0</version>
|
|
57
|
-
<configuration>
|
|
58
|
-
<includeRubygemsInResources>true</includeRubygemsInResources>
|
|
59
|
-
</configuration>
|
|
60
|
-
<executions>
|
|
61
|
-
<execution>
|
|
62
|
-
<goals>
|
|
63
|
-
<goal>initialize</goal>
|
|
64
|
-
</goals>
|
|
65
|
-
</execution>
|
|
66
|
-
</executions>
|
|
67
|
-
</plugin>
|
|
68
|
-
|
|
69
|
-
<plugin>
|
|
70
|
-
<groupId>org.apache.maven.plugins</groupId>
|
|
71
|
-
<artifactId>maven-shade-plugin</artifactId>
|
|
72
|
-
<executions>
|
|
73
|
-
<execution>
|
|
74
|
-
<phase>package</phase>
|
|
75
|
-
<goals>
|
|
76
|
-
<goal>shade</goal>
|
|
77
|
-
</goals>
|
|
78
|
-
<configuration>
|
|
79
|
-
<shadedArtifactAttached>true</shadedArtifactAttached>
|
|
80
|
-
<shadedClassifierName>executable</shadedClassifierName>
|
|
81
|
-
<transformers>
|
|
82
|
-
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
|
83
|
-
<manifestEntries>
|
|
84
|
-
<Main-Class>org.embulk.cli.Main</Main-Class>
|
|
85
|
-
</manifestEntries>
|
|
86
|
-
</transformer>
|
|
87
|
-
</transformers>
|
|
88
|
-
</configuration>
|
|
89
|
-
</execution>
|
|
90
|
-
</executions>
|
|
91
|
-
</plugin>
|
|
92
|
-
</plugins>
|
|
93
|
-
</build>
|
|
94
|
-
</project>
|
data/embulk-core/pom.xml
DELETED
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
|
4
|
-
|
|
5
|
-
<parent>
|
|
6
|
-
<groupId>org.embulk</groupId>
|
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
|
9
|
-
</parent>
|
|
10
|
-
|
|
11
|
-
<artifactId>embulk-core</artifactId>
|
|
12
|
-
<name>embulk-core</name>
|
|
13
|
-
|
|
14
|
-
<dependencies>
|
|
15
|
-
<dependency>
|
|
16
|
-
<groupId>commons-logging</groupId>
|
|
17
|
-
<artifactId>commons-logging</artifactId>
|
|
18
|
-
</dependency>
|
|
19
|
-
|
|
20
|
-
<dependency>
|
|
21
|
-
<groupId>com.google.guava</groupId>
|
|
22
|
-
<artifactId>guava</artifactId>
|
|
23
|
-
</dependency>
|
|
24
|
-
|
|
25
|
-
<dependency>
|
|
26
|
-
<groupId>com.google.inject</groupId>
|
|
27
|
-
<artifactId>guice</artifactId>
|
|
28
|
-
</dependency>
|
|
29
|
-
|
|
30
|
-
<dependency>
|
|
31
|
-
<groupId>com.google.inject.extensions</groupId>
|
|
32
|
-
<artifactId>guice-multibindings</artifactId>
|
|
33
|
-
</dependency>
|
|
34
|
-
|
|
35
|
-
<dependency>
|
|
36
|
-
<groupId>javax.inject</groupId>
|
|
37
|
-
<artifactId>javax.inject</artifactId>
|
|
38
|
-
</dependency>
|
|
39
|
-
|
|
40
|
-
<dependency>
|
|
41
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
|
42
|
-
<artifactId>jackson-databind</artifactId>
|
|
43
|
-
</dependency>
|
|
44
|
-
|
|
45
|
-
<dependency>
|
|
46
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
|
47
|
-
<artifactId>jackson-annotations</artifactId>
|
|
48
|
-
</dependency>
|
|
49
|
-
|
|
50
|
-
<dependency>
|
|
51
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
|
52
|
-
<artifactId>jackson-core</artifactId>
|
|
53
|
-
</dependency>
|
|
54
|
-
|
|
55
|
-
<dependency>
|
|
56
|
-
<groupId>com.fasterxml.jackson.datatype</groupId>
|
|
57
|
-
<artifactId>jackson-datatype-guava</artifactId>
|
|
58
|
-
</dependency>
|
|
59
|
-
|
|
60
|
-
<dependency>
|
|
61
|
-
<groupId>com.fasterxml.jackson.module</groupId>
|
|
62
|
-
<artifactId>jackson-module-guice</artifactId>
|
|
63
|
-
</dependency>
|
|
64
|
-
|
|
65
|
-
<dependency>
|
|
66
|
-
<groupId>com.fasterxml.jackson.datatype</groupId>
|
|
67
|
-
<artifactId>jackson-datatype-joda</artifactId>
|
|
68
|
-
</dependency>
|
|
69
|
-
|
|
70
|
-
<dependency>
|
|
71
|
-
<groupId>org.slf4j</groupId>
|
|
72
|
-
<artifactId>slf4j-api</artifactId>
|
|
73
|
-
</dependency>
|
|
74
|
-
|
|
75
|
-
<dependency>
|
|
76
|
-
<groupId>org.slf4j</groupId>
|
|
77
|
-
<artifactId>slf4j-log4j12</artifactId>
|
|
78
|
-
</dependency>
|
|
79
|
-
|
|
80
|
-
<dependency>
|
|
81
|
-
<groupId>log4j</groupId>
|
|
82
|
-
<artifactId>log4j</artifactId>
|
|
83
|
-
</dependency>
|
|
84
|
-
|
|
85
|
-
<dependency>
|
|
86
|
-
<groupId>joda-time</groupId>
|
|
87
|
-
<artifactId>joda-time</artifactId>
|
|
88
|
-
</dependency>
|
|
89
|
-
|
|
90
|
-
<dependency>
|
|
91
|
-
<groupId>org.yaml</groupId>
|
|
92
|
-
<artifactId>snakeyaml</artifactId>
|
|
93
|
-
</dependency>
|
|
94
|
-
|
|
95
|
-
<dependency>
|
|
96
|
-
<groupId>javax.validation</groupId>
|
|
97
|
-
<artifactId>validation-api</artifactId>
|
|
98
|
-
</dependency>
|
|
99
|
-
|
|
100
|
-
<dependency>
|
|
101
|
-
<groupId>org.apache.bval</groupId>
|
|
102
|
-
<artifactId>bval-jsr303</artifactId>
|
|
103
|
-
</dependency>
|
|
104
|
-
|
|
105
|
-
<dependency>
|
|
106
|
-
<groupId>io.airlift</groupId>
|
|
107
|
-
<artifactId>slice</artifactId>
|
|
108
|
-
</dependency>
|
|
109
|
-
|
|
110
|
-
<dependency>
|
|
111
|
-
<groupId>io.netty</groupId>
|
|
112
|
-
<artifactId>netty-buffer</artifactId>
|
|
113
|
-
</dependency>
|
|
114
|
-
|
|
115
|
-
<dependency>
|
|
116
|
-
<groupId>org.jruby</groupId>
|
|
117
|
-
<artifactId>jruby-complete</artifactId>
|
|
118
|
-
</dependency>
|
|
119
|
-
|
|
120
|
-
<dependency>
|
|
121
|
-
<groupId>com.google.code.findbugs</groupId>
|
|
122
|
-
<artifactId>annotations</artifactId>
|
|
123
|
-
</dependency>
|
|
124
|
-
|
|
125
|
-
<!-- for guess_charset plugin -->
|
|
126
|
-
<dependency>
|
|
127
|
-
<groupId>com.ibm.icu</groupId>
|
|
128
|
-
<artifactId>icu4j</artifactId>
|
|
129
|
-
<scope>runtime</scope>
|
|
130
|
-
</dependency>
|
|
131
|
-
|
|
132
|
-
<dependency>
|
|
133
|
-
<groupId>junit</groupId>
|
|
134
|
-
<artifactId>junit</artifactId>
|
|
135
|
-
</dependency>
|
|
136
|
-
</dependencies>
|
|
137
|
-
|
|
138
|
-
<build>
|
|
139
|
-
<resources>
|
|
140
|
-
<resource>
|
|
141
|
-
<directory>src/main/resources</directory>
|
|
142
|
-
</resource>
|
|
143
|
-
<resource>
|
|
144
|
-
<directory>../lib</directory>
|
|
145
|
-
</resource>
|
|
146
|
-
</resources>
|
|
147
|
-
</build>
|
|
148
|
-
</project>
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
package org.embulk.config;
|
|
2
|
-
|
|
3
|
-
public interface NextConfig
|
|
4
|
-
extends DataSource
|
|
5
|
-
{
|
|
6
|
-
@Override
|
|
7
|
-
public NextConfig getNested(String attrName);
|
|
8
|
-
|
|
9
|
-
@Override
|
|
10
|
-
public NextConfig getNestedOrSetEmpty(String attrName);
|
|
11
|
-
|
|
12
|
-
@Override
|
|
13
|
-
public NextConfig set(String attrName, Object v);
|
|
14
|
-
|
|
15
|
-
@Override
|
|
16
|
-
public NextConfig setNested(String attrName, DataSource v);
|
|
17
|
-
|
|
18
|
-
@Override
|
|
19
|
-
public NextConfig setAll(DataSource other);
|
|
20
|
-
|
|
21
|
-
@Override
|
|
22
|
-
public NextConfig deepCopy();
|
|
23
|
-
|
|
24
|
-
@Override
|
|
25
|
-
public NextConfig merge(DataSource other);
|
|
26
|
-
}
|
data/embulk-standards/pom.xml
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
3
|
-
<modelVersion>4.0.0</modelVersion>
|
|
4
|
-
|
|
5
|
-
<parent>
|
|
6
|
-
<groupId>org.embulk</groupId>
|
|
7
|
-
<artifactId>embulk-parent</artifactId>
|
|
8
|
-
<version>0.3.2-SNAPSHOT</version>
|
|
9
|
-
</parent>
|
|
10
|
-
|
|
11
|
-
<artifactId>embulk-standards</artifactId>
|
|
12
|
-
<name>embulk-standards</name>
|
|
13
|
-
|
|
14
|
-
<dependencies>
|
|
15
|
-
<dependency>
|
|
16
|
-
<groupId>org.embulk</groupId>
|
|
17
|
-
<artifactId>embulk-core</artifactId>
|
|
18
|
-
</dependency>
|
|
19
|
-
|
|
20
|
-
<dependency>
|
|
21
|
-
<groupId>org.embulk</groupId>
|
|
22
|
-
<artifactId>embulk-core</artifactId>
|
|
23
|
-
<type>test-jar</type>
|
|
24
|
-
<scope>test</scope>
|
|
25
|
-
</dependency>
|
|
26
|
-
|
|
27
|
-
<dependency>
|
|
28
|
-
<groupId>com.google.guava</groupId>
|
|
29
|
-
<artifactId>guava</artifactId>
|
|
30
|
-
</dependency>
|
|
31
|
-
|
|
32
|
-
<dependency>
|
|
33
|
-
<groupId>com.google.inject</groupId>
|
|
34
|
-
<artifactId>guice</artifactId>
|
|
35
|
-
</dependency>
|
|
36
|
-
|
|
37
|
-
<dependency>
|
|
38
|
-
<groupId>javax.validation</groupId>
|
|
39
|
-
<artifactId>validation-api</artifactId>
|
|
40
|
-
</dependency>
|
|
41
|
-
|
|
42
|
-
<dependency>
|
|
43
|
-
<groupId>com.fasterxml.jackson.core</groupId>
|
|
44
|
-
<artifactId>jackson-databind</artifactId>
|
|
45
|
-
</dependency>
|
|
46
|
-
|
|
47
|
-
<dependency>
|
|
48
|
-
<groupId>org.slf4j</groupId>
|
|
49
|
-
<artifactId>slf4j-api</artifactId>
|
|
50
|
-
</dependency>
|
|
51
|
-
|
|
52
|
-
<dependency>
|
|
53
|
-
<groupId>com.amazonaws</groupId>
|
|
54
|
-
<artifactId>aws-java-sdk</artifactId>
|
|
55
|
-
<version>1.5.2</version>
|
|
56
|
-
</dependency>
|
|
57
|
-
|
|
58
|
-
<dependency>
|
|
59
|
-
<groupId>junit</groupId>
|
|
60
|
-
<artifactId>junit</artifactId>
|
|
61
|
-
</dependency>
|
|
62
|
-
|
|
63
|
-
<dependency>
|
|
64
|
-
<groupId>org.mockito</groupId>
|
|
65
|
-
<artifactId>mockito-core</artifactId>
|
|
66
|
-
</dependency>
|
|
67
|
-
</dependencies>
|
|
68
|
-
</project>
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
package org.embulk.standards;
|
|
2
|
-
|
|
3
|
-
import java.util.List;
|
|
4
|
-
import java.io.IOException;
|
|
5
|
-
import java.io.InputStream;
|
|
6
|
-
|
|
7
|
-
import com.amazonaws.AmazonClientException;
|
|
8
|
-
import com.amazonaws.AmazonServiceException;
|
|
9
|
-
import com.google.common.collect.ImmutableList;
|
|
10
|
-
import com.google.common.base.Optional;
|
|
11
|
-
import com.fasterxml.jackson.annotation.JacksonInject;
|
|
12
|
-
import com.amazonaws.auth.AWSCredentials;
|
|
13
|
-
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
14
|
-
import com.amazonaws.auth.BasicAWSCredentials;
|
|
15
|
-
import com.amazonaws.services.s3.AmazonS3Client;
|
|
16
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
|
17
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
|
18
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
|
19
|
-
import com.amazonaws.services.s3.model.GetObjectRequest;
|
|
20
|
-
import com.amazonaws.services.s3.model.S3Object;
|
|
21
|
-
import com.amazonaws.ClientConfiguration;
|
|
22
|
-
import org.embulk.config.Config;
|
|
23
|
-
import org.embulk.config.Task;
|
|
24
|
-
import org.embulk.config.TaskSource;
|
|
25
|
-
import org.embulk.config.ConfigSource;
|
|
26
|
-
import org.embulk.config.NextConfig;
|
|
27
|
-
import org.embulk.config.CommitReport;
|
|
28
|
-
import org.embulk.spi.BufferAllocator;
|
|
29
|
-
import org.embulk.spi.Exec;
|
|
30
|
-
import org.embulk.spi.FileInputPlugin;
|
|
31
|
-
import org.embulk.spi.TransactionalFileInput;
|
|
32
|
-
import org.embulk.spi.util.InputStreamFileInput;
|
|
33
|
-
import org.slf4j.Logger;
|
|
34
|
-
|
|
35
|
-
import static org.embulk.spi.util.Inputs.formatPath;
|
|
36
|
-
|
|
37
|
-
public class S3FileInputPlugin
|
|
38
|
-
implements FileInputPlugin
|
|
39
|
-
{
|
|
40
|
-
public interface PluginTask
|
|
41
|
-
extends Task
|
|
42
|
-
{
|
|
43
|
-
@Config("bucket")
|
|
44
|
-
public String getBucket();
|
|
45
|
-
|
|
46
|
-
@Config("paths")
|
|
47
|
-
public List<String> getPathPrefixes();
|
|
48
|
-
|
|
49
|
-
@Config("endpoint")
|
|
50
|
-
public Optional<String> getEndpoint();
|
|
51
|
-
|
|
52
|
-
// TODO timeout, ssl, etc
|
|
53
|
-
|
|
54
|
-
@Config("access_key_id")
|
|
55
|
-
public String getAccessKeyId();
|
|
56
|
-
|
|
57
|
-
@Config("secret_access_key")
|
|
58
|
-
public String getSecretAccessKey();
|
|
59
|
-
|
|
60
|
-
// TODO support more options such as STS
|
|
61
|
-
|
|
62
|
-
public List<String> getFiles();
|
|
63
|
-
public void setFiles(List<String> files);
|
|
64
|
-
|
|
65
|
-
@JacksonInject
|
|
66
|
-
public BufferAllocator getBufferAllocator();
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
private final Logger log = Exec.getLogger(getClass());
|
|
70
|
-
|
|
71
|
-
@Override
|
|
72
|
-
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
|
73
|
-
{
|
|
74
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
|
75
|
-
|
|
76
|
-
// list files recursively
|
|
77
|
-
task.setFiles(listFiles(task));
|
|
78
|
-
|
|
79
|
-
// number of processors is same with number of files
|
|
80
|
-
int processorCount = task.getFiles().size();
|
|
81
|
-
return resume(task.dump(), processorCount, control);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
@Override
|
|
85
|
-
public NextConfig resume(TaskSource taskSource,
|
|
86
|
-
int processorCount,
|
|
87
|
-
FileInputPlugin.Control control)
|
|
88
|
-
{
|
|
89
|
-
control.run(taskSource, processorCount);
|
|
90
|
-
return Exec.newNextConfig();
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
@Override
|
|
94
|
-
public void cleanup(TaskSource taskSource,
|
|
95
|
-
int processorCount,
|
|
96
|
-
List<CommitReport> successCommitReports)
|
|
97
|
-
{ }
|
|
98
|
-
|
|
99
|
-
public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
|
|
100
|
-
{
|
|
101
|
-
final AWSCredentials cred = new BasicAWSCredentials(
|
|
102
|
-
task.getAccessKeyId(), task.getSecretAccessKey());
|
|
103
|
-
return new AWSCredentialsProvider() {
|
|
104
|
-
public AWSCredentials getCredentials()
|
|
105
|
-
{
|
|
106
|
-
return cred;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
public void refresh()
|
|
110
|
-
{
|
|
111
|
-
}
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
private static AmazonS3Client newS3Client(PluginTask task)
|
|
116
|
-
{
|
|
117
|
-
AWSCredentialsProvider credentials = getCredentialsProvider(task);
|
|
118
|
-
AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
|
|
119
|
-
return client;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
|
|
123
|
-
Optional<String> endpoint)
|
|
124
|
-
{
|
|
125
|
-
// TODO get config from AmazonS3Task
|
|
126
|
-
ClientConfiguration clientConfig = new ClientConfiguration();
|
|
127
|
-
//clientConfig.setProtocol(Protocol.HTTP);
|
|
128
|
-
clientConfig.setMaxConnections(50); // SDK default: 50
|
|
129
|
-
clientConfig.setMaxErrorRetry(3); // SDK default: 3
|
|
130
|
-
clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
|
|
131
|
-
|
|
132
|
-
AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
|
|
133
|
-
|
|
134
|
-
if (endpoint.isPresent()) {
|
|
135
|
-
client.setEndpoint(endpoint.get());
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
return client;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
public List<String> listFiles(PluginTask task)
|
|
142
|
-
{
|
|
143
|
-
AmazonS3Client client = newS3Client(task);
|
|
144
|
-
String bucketName = task.getBucket();
|
|
145
|
-
|
|
146
|
-
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
|
147
|
-
for (String prefix : task.getPathPrefixes()) {
|
|
148
|
-
String formatted = formatPath(prefix);
|
|
149
|
-
try {
|
|
150
|
-
log.info("Listing S3 files with prefix '{}'", formatted);
|
|
151
|
-
builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
|
|
152
|
-
} catch (RuntimeException e) {
|
|
153
|
-
throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
return builder.build();
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Lists S3 filenames filtered by prefix.
|
|
162
|
-
*
|
|
163
|
-
* The resulting list does not include the file that's size == 0.
|
|
164
|
-
*/
|
|
165
|
-
public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
|
|
166
|
-
{
|
|
167
|
-
// TODO implement retrying
|
|
168
|
-
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
|
169
|
-
|
|
170
|
-
String lastKey = null;
|
|
171
|
-
do {
|
|
172
|
-
ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
|
|
173
|
-
ObjectListing ol = client.listObjects(req);
|
|
174
|
-
for(S3ObjectSummary s : ol.getObjectSummaries()) {
|
|
175
|
-
if (s.getSize() > 0) {
|
|
176
|
-
builder.add(s.getKey());
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
lastKey = ol.getNextMarker();
|
|
180
|
-
} while(lastKey != null);
|
|
181
|
-
|
|
182
|
-
return builder.build();
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
@Override
|
|
186
|
-
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
|
187
|
-
{
|
|
188
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
189
|
-
return new S3FileInput(task, processorIndex);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
public static class S3FileInput
|
|
193
|
-
extends InputStreamFileInput
|
|
194
|
-
implements TransactionalFileInput
|
|
195
|
-
{
|
|
196
|
-
// TODO create single-file InputStreamFileInput utility
|
|
197
|
-
private static class SingleFileProvider
|
|
198
|
-
implements InputStreamFileInput.Provider
|
|
199
|
-
{
|
|
200
|
-
private AmazonS3Client client;
|
|
201
|
-
private final String bucket;
|
|
202
|
-
private final String key;
|
|
203
|
-
private boolean opened = false;
|
|
204
|
-
|
|
205
|
-
public SingleFileProvider(PluginTask task, int processorIndex)
|
|
206
|
-
{
|
|
207
|
-
this.client = newS3Client(task);
|
|
208
|
-
this.bucket = task.getBucket();
|
|
209
|
-
this.key = task.getFiles().get(processorIndex);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
@Override
|
|
213
|
-
public InputStream openNext() throws IOException
|
|
214
|
-
{
|
|
215
|
-
if (opened) {
|
|
216
|
-
return null;
|
|
217
|
-
}
|
|
218
|
-
opened = true;
|
|
219
|
-
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
|
220
|
-
//if (pos > 0) {
|
|
221
|
-
// request.setRange(pos, contentLength);
|
|
222
|
-
//}
|
|
223
|
-
S3Object obj = client.getObject(request);
|
|
224
|
-
//if (pos <= 0) {
|
|
225
|
-
// // first call
|
|
226
|
-
// contentLength = obj.getObjectMetadata().getContentLength();
|
|
227
|
-
//}
|
|
228
|
-
return obj.getObjectContent();
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
@Override
|
|
232
|
-
public void close() { }
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
public S3FileInput(PluginTask task, int processorIndex)
|
|
236
|
-
{
|
|
237
|
-
super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
public void abort() { }
|
|
241
|
-
|
|
242
|
-
public CommitReport commit()
|
|
243
|
-
{
|
|
244
|
-
return Exec.newCommitReport();
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
@Override
|
|
248
|
-
public void close() { }
|
|
249
|
-
}
|
|
250
|
-
}
|