embulk 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. checksums.yaml +5 -13
  2. data/.travis.yml +16 -0
  3. data/Gemfile +0 -1
  4. data/README.md +37 -19
  5. data/Rakefile +5 -37
  6. data/bin/embulk +1 -1
  7. data/build.gradle +178 -95
  8. data/embulk-core/build.gradle +1 -1
  9. data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
  10. data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
  11. data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
  12. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
  13. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
  14. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
  15. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
  17. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
  18. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
  19. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
  20. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
  21. data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
  22. data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
  23. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
  24. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
  25. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
  26. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
  27. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
  28. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
  29. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
  30. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
  32. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
  33. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
  35. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
  36. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
  37. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
  38. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
  39. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
  40. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
  41. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
  42. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
  43. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
  44. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
  45. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
  46. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
  47. data/embulk-docs/Makefile +178 -0
  48. data/embulk-docs/build.gradle +20 -0
  49. data/embulk-docs/make.bat +243 -0
  50. data/embulk-docs/push-gh-pages.sh +29 -0
  51. data/embulk-docs/src/conf.py +260 -0
  52. data/embulk-docs/src/index.rst +19 -0
  53. data/embulk-docs/src/release.rst +14 -0
  54. data/embulk-docs/src/release/release-0.1.0.rst +8 -0
  55. data/embulk-docs/src/release/release-0.2.0.rst +16 -0
  56. data/embulk-docs/src/release/release-0.2.1.rst +19 -0
  57. data/embulk-docs/src/release/release-0.3.0.rst +34 -0
  58. data/embulk-docs/src/release/release-0.3.1.rst +11 -0
  59. data/embulk-docs/src/release/release-0.3.2.rst +15 -0
  60. data/embulk-docs/src/release/release-0.4.0.rst +74 -0
  61. data/embulk-standards/build.gradle +0 -1
  62. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
  63. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
  64. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
  65. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
  66. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
  67. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
  68. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
  69. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
  70. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  71. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  72. data/lib/embulk/buffer.rb +2 -2
  73. data/lib/embulk/column.rb +6 -6
  74. data/lib/embulk/command/embulk_example.rb +1 -1
  75. data/lib/embulk/command/embulk_new_plugin.rb +87 -0
  76. data/lib/embulk/command/embulk_run.rb +84 -26
  77. data/lib/embulk/data/bundle/Gemfile +12 -20
  78. data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
  79. data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
  80. data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
  81. data/lib/embulk/data/new/LICENSE.txt +21 -0
  82. data/lib/embulk/data/new/README.md.erb +75 -0
  83. data/lib/embulk/data/new/gitignore.erb +12 -0
  84. data/lib/embulk/data/new/java/build.gradle.erb +57 -0
  85. data/lib/embulk/data/new/java/decoder.java.erb +40 -0
  86. data/lib/embulk/data/new/java/encoder.java.erb +40 -0
  87. data/lib/embulk/data/new/java/file_input.java.erb +64 -0
  88. data/lib/embulk/data/new/java/file_output.java.erb +66 -0
  89. data/lib/embulk/data/new/java/filter.java.erb +47 -0
  90. data/lib/embulk/data/new/java/formatter.java.erb +45 -0
  91. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  92. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
  93. data/lib/embulk/data/new/java/gradlew +164 -0
  94. data/lib/embulk/data/new/java/gradlew.bat +90 -0
  95. data/lib/embulk/data/new/java/input.java.erb +69 -0
  96. data/lib/embulk/data/new/java/output.java.erb +65 -0
  97. data/lib/embulk/data/new/java/parser.java.erb +51 -0
  98. data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
  99. data/lib/embulk/data/new/java/test.java.erb +5 -0
  100. data/lib/embulk/data/new/ruby/Gemfile +2 -0
  101. data/lib/embulk/data/new/ruby/Rakefile +1 -0
  102. data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
  103. data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
  104. data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
  105. data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
  106. data/lib/embulk/data/package_data.rb +64 -0
  107. data/lib/embulk/data_source.rb +2 -2
  108. data/lib/embulk/decoder_plugin.rb +27 -0
  109. data/lib/embulk/encoder_plugin.rb +27 -0
  110. data/lib/embulk/error.rb +3 -0
  111. data/lib/embulk/file_input_plugin.rb +27 -0
  112. data/lib/embulk/file_output_plugin.rb +27 -0
  113. data/lib/embulk/filter_plugin.rb +28 -9
  114. data/lib/embulk/formatter_plugin.rb +105 -0
  115. data/lib/embulk/guess_csv.rb +10 -1
  116. data/lib/embulk/guess_plugin.rb +22 -27
  117. data/lib/embulk/input_plugin.rb +34 -20
  118. data/lib/embulk/java/bootstrap.rb +5 -0
  119. data/lib/embulk/java/imports.rb +7 -0
  120. data/lib/embulk/java_plugin.rb +84 -0
  121. data/lib/embulk/output_plugin.rb +35 -19
  122. data/lib/embulk/page.rb +1 -1
  123. data/lib/embulk/page_builder.rb +1 -1
  124. data/lib/embulk/parser_plugin.rb +76 -0
  125. data/lib/embulk/plugin.rb +130 -65
  126. data/lib/embulk/plugin_registry.rb +19 -8
  127. data/lib/embulk/schema.rb +4 -4
  128. data/lib/embulk/version.rb +1 -1
  129. data/settings.gradle +1 -0
  130. metadata +123 -90
  131. data/ChangeLog +0 -46
  132. data/embulk-cli/pom.xml +0 -94
  133. data/embulk-core/pom.xml +0 -148
  134. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
  135. data/embulk-standards/pom.xml +0 -68
  136. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
  137. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
  138. data/pom.xml +0 -541
data/ChangeLog DELETED
@@ -1,46 +0,0 @@
1
-
2
- 2015-02-04 version 0.3.2:
3
-
4
- * Fixed a problem where ruby input plugins can't use timestamp type (reported
5
- by @shun0102)
6
- * Embulk::Page includes Enumerable to include map, each_with_index, and other
7
- a lot of convenient methods (@niku++)
8
- * Fixed TimestampType::DEFAULT_FORMAT to use ':' as the separator of times
9
-
10
-
11
- 2015-02-04 version 0.3.1:
12
-
13
- * Added -C, --classpath option to add java classpath
14
-
15
-
16
- 2015-02-03 version 0.3.0:
17
-
18
- * Added resume functionality. InputPlugin and OutputPlugin needs to implement
19
- resume and cleanup methods.
20
- * cli: embulk-run supports -r, --resume-state PATH option.
21
- * Added FilterInputPlugin Java API.
22
- * Added FilterInputPlugin JRuby API.
23
- * Configuration file accepts filters: array entry.
24
- * Added gradle-versions-plugin to build.gradle (@seratch++)
25
- * Fixed broken dependencies at build.gradle (@thagikura++)
26
-
27
-
28
- 2015-01-29 version 0.2.1:
29
-
30
- * Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
31
- * Fixed NextConfig to be merged to in: or out: rather than the top-level
32
- (reported by enukane) [#41]
33
- * ./bin/embulk shows warns to run `rake` if ./classpath doesn't exist
34
- * Embulk::PageBuilder#add accepts nil
35
-
36
-
37
- 2015-01-26 version 0.2.0:
38
-
39
- * Changed JRuby InputPlugin API to use #run instead of .run
40
- * Fixed 'example' subcommand to quote file path for Windows environment
41
-
42
-
43
- 2015-01-26 version 0.1.0:
44
-
45
- * The first release
46
-
data/embulk-cli/pom.xml DELETED
@@ -1,94 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-cli</artifactId>
12
- <name>embulk-cli</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>org.jruby</groupId>
17
- <artifactId>jruby-complete</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>org.embulk</groupId>
22
- <artifactId>embulk-core</artifactId>
23
- </dependency>
24
-
25
- <!-- TODO move embulk-standards to another repository -->
26
- <!-- TODO once embulk-standards is moved to another repository, replace this dependency with gem from rubygems-proxy -->
27
- <!-- and change Rakefile to include embulk-core instead of embulk-cli and gemspec depends on embulk-standards -->
28
- <dependency>
29
- <groupId>org.embulk</groupId>
30
- <artifactId>embulk-standards</artifactId>
31
- </dependency>
32
- </dependencies>
33
-
34
- <repositories>
35
- <!-- See https://github.com/jruby/jruby/wiki/Jruby-Scripting-container-using-Gems-with-a-Maven-Project -->
36
- <repository>
37
- <id>rubygems-proxy</id>
38
- <name>RubyGems Proxy</name>
39
- <url>http://rubygems-proxy.torquebox.org/releases</url>
40
- <layout>default</layout>
41
- <releases>
42
- <enabled>true</enabled>
43
- </releases>
44
- <snapshots>
45
- <enabled>false</enabled>
46
- <updatePolicy>never</updatePolicy>
47
- </snapshots>
48
- </repository>
49
- </repositories>
50
-
51
- <build>
52
- <plugins>
53
- <plugin>
54
- <groupId>de.saumya.mojo</groupId>
55
- <artifactId>gem-maven-plugin</artifactId>
56
- <version>1.0.0</version>
57
- <configuration>
58
- <includeRubygemsInResources>true</includeRubygemsInResources>
59
- </configuration>
60
- <executions>
61
- <execution>
62
- <goals>
63
- <goal>initialize</goal>
64
- </goals>
65
- </execution>
66
- </executions>
67
- </plugin>
68
-
69
- <plugin>
70
- <groupId>org.apache.maven.plugins</groupId>
71
- <artifactId>maven-shade-plugin</artifactId>
72
- <executions>
73
- <execution>
74
- <phase>package</phase>
75
- <goals>
76
- <goal>shade</goal>
77
- </goals>
78
- <configuration>
79
- <shadedArtifactAttached>true</shadedArtifactAttached>
80
- <shadedClassifierName>executable</shadedClassifierName>
81
- <transformers>
82
- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
83
- <manifestEntries>
84
- <Main-Class>org.embulk.cli.Main</Main-Class>
85
- </manifestEntries>
86
- </transformer>
87
- </transformers>
88
- </configuration>
89
- </execution>
90
- </executions>
91
- </plugin>
92
- </plugins>
93
- </build>
94
- </project>
data/embulk-core/pom.xml DELETED
@@ -1,148 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-core</artifactId>
12
- <name>embulk-core</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>commons-logging</groupId>
17
- <artifactId>commons-logging</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>com.google.guava</groupId>
22
- <artifactId>guava</artifactId>
23
- </dependency>
24
-
25
- <dependency>
26
- <groupId>com.google.inject</groupId>
27
- <artifactId>guice</artifactId>
28
- </dependency>
29
-
30
- <dependency>
31
- <groupId>com.google.inject.extensions</groupId>
32
- <artifactId>guice-multibindings</artifactId>
33
- </dependency>
34
-
35
- <dependency>
36
- <groupId>javax.inject</groupId>
37
- <artifactId>javax.inject</artifactId>
38
- </dependency>
39
-
40
- <dependency>
41
- <groupId>com.fasterxml.jackson.core</groupId>
42
- <artifactId>jackson-databind</artifactId>
43
- </dependency>
44
-
45
- <dependency>
46
- <groupId>com.fasterxml.jackson.core</groupId>
47
- <artifactId>jackson-annotations</artifactId>
48
- </dependency>
49
-
50
- <dependency>
51
- <groupId>com.fasterxml.jackson.core</groupId>
52
- <artifactId>jackson-core</artifactId>
53
- </dependency>
54
-
55
- <dependency>
56
- <groupId>com.fasterxml.jackson.datatype</groupId>
57
- <artifactId>jackson-datatype-guava</artifactId>
58
- </dependency>
59
-
60
- <dependency>
61
- <groupId>com.fasterxml.jackson.module</groupId>
62
- <artifactId>jackson-module-guice</artifactId>
63
- </dependency>
64
-
65
- <dependency>
66
- <groupId>com.fasterxml.jackson.datatype</groupId>
67
- <artifactId>jackson-datatype-joda</artifactId>
68
- </dependency>
69
-
70
- <dependency>
71
- <groupId>org.slf4j</groupId>
72
- <artifactId>slf4j-api</artifactId>
73
- </dependency>
74
-
75
- <dependency>
76
- <groupId>org.slf4j</groupId>
77
- <artifactId>slf4j-log4j12</artifactId>
78
- </dependency>
79
-
80
- <dependency>
81
- <groupId>log4j</groupId>
82
- <artifactId>log4j</artifactId>
83
- </dependency>
84
-
85
- <dependency>
86
- <groupId>joda-time</groupId>
87
- <artifactId>joda-time</artifactId>
88
- </dependency>
89
-
90
- <dependency>
91
- <groupId>org.yaml</groupId>
92
- <artifactId>snakeyaml</artifactId>
93
- </dependency>
94
-
95
- <dependency>
96
- <groupId>javax.validation</groupId>
97
- <artifactId>validation-api</artifactId>
98
- </dependency>
99
-
100
- <dependency>
101
- <groupId>org.apache.bval</groupId>
102
- <artifactId>bval-jsr303</artifactId>
103
- </dependency>
104
-
105
- <dependency>
106
- <groupId>io.airlift</groupId>
107
- <artifactId>slice</artifactId>
108
- </dependency>
109
-
110
- <dependency>
111
- <groupId>io.netty</groupId>
112
- <artifactId>netty-buffer</artifactId>
113
- </dependency>
114
-
115
- <dependency>
116
- <groupId>org.jruby</groupId>
117
- <artifactId>jruby-complete</artifactId>
118
- </dependency>
119
-
120
- <dependency>
121
- <groupId>com.google.code.findbugs</groupId>
122
- <artifactId>annotations</artifactId>
123
- </dependency>
124
-
125
- <!-- for guess_charset plugin -->
126
- <dependency>
127
- <groupId>com.ibm.icu</groupId>
128
- <artifactId>icu4j</artifactId>
129
- <scope>runtime</scope>
130
- </dependency>
131
-
132
- <dependency>
133
- <groupId>junit</groupId>
134
- <artifactId>junit</artifactId>
135
- </dependency>
136
- </dependencies>
137
-
138
- <build>
139
- <resources>
140
- <resource>
141
- <directory>src/main/resources</directory>
142
- </resource>
143
- <resource>
144
- <directory>../lib</directory>
145
- </resource>
146
- </resources>
147
- </build>
148
- </project>
@@ -1,26 +0,0 @@
1
- package org.embulk.config;
2
-
3
- public interface NextConfig
4
- extends DataSource
5
- {
6
- @Override
7
- public NextConfig getNested(String attrName);
8
-
9
- @Override
10
- public NextConfig getNestedOrSetEmpty(String attrName);
11
-
12
- @Override
13
- public NextConfig set(String attrName, Object v);
14
-
15
- @Override
16
- public NextConfig setNested(String attrName, DataSource v);
17
-
18
- @Override
19
- public NextConfig setAll(DataSource other);
20
-
21
- @Override
22
- public NextConfig deepCopy();
23
-
24
- @Override
25
- public NextConfig merge(DataSource other);
26
- }
@@ -1,68 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-standards</artifactId>
12
- <name>embulk-standards</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>org.embulk</groupId>
17
- <artifactId>embulk-core</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>org.embulk</groupId>
22
- <artifactId>embulk-core</artifactId>
23
- <type>test-jar</type>
24
- <scope>test</scope>
25
- </dependency>
26
-
27
- <dependency>
28
- <groupId>com.google.guava</groupId>
29
- <artifactId>guava</artifactId>
30
- </dependency>
31
-
32
- <dependency>
33
- <groupId>com.google.inject</groupId>
34
- <artifactId>guice</artifactId>
35
- </dependency>
36
-
37
- <dependency>
38
- <groupId>javax.validation</groupId>
39
- <artifactId>validation-api</artifactId>
40
- </dependency>
41
-
42
- <dependency>
43
- <groupId>com.fasterxml.jackson.core</groupId>
44
- <artifactId>jackson-databind</artifactId>
45
- </dependency>
46
-
47
- <dependency>
48
- <groupId>org.slf4j</groupId>
49
- <artifactId>slf4j-api</artifactId>
50
- </dependency>
51
-
52
- <dependency>
53
- <groupId>com.amazonaws</groupId>
54
- <artifactId>aws-java-sdk</artifactId>
55
- <version>1.5.2</version>
56
- </dependency>
57
-
58
- <dependency>
59
- <groupId>junit</groupId>
60
- <artifactId>junit</artifactId>
61
- </dependency>
62
-
63
- <dependency>
64
- <groupId>org.mockito</groupId>
65
- <artifactId>mockito-core</artifactId>
66
- </dependency>
67
- </dependencies>
68
- </project>
@@ -1,250 +0,0 @@
1
- package org.embulk.standards;
2
-
3
- import java.util.List;
4
- import java.io.IOException;
5
- import java.io.InputStream;
6
-
7
- import com.amazonaws.AmazonClientException;
8
- import com.amazonaws.AmazonServiceException;
9
- import com.google.common.collect.ImmutableList;
10
- import com.google.common.base.Optional;
11
- import com.fasterxml.jackson.annotation.JacksonInject;
12
- import com.amazonaws.auth.AWSCredentials;
13
- import com.amazonaws.auth.AWSCredentialsProvider;
14
- import com.amazonaws.auth.BasicAWSCredentials;
15
- import com.amazonaws.services.s3.AmazonS3Client;
16
- import com.amazonaws.services.s3.model.ListObjectsRequest;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
- import com.amazonaws.services.s3.model.ObjectListing;
19
- import com.amazonaws.services.s3.model.GetObjectRequest;
20
- import com.amazonaws.services.s3.model.S3Object;
21
- import com.amazonaws.ClientConfiguration;
22
- import org.embulk.config.Config;
23
- import org.embulk.config.Task;
24
- import org.embulk.config.TaskSource;
25
- import org.embulk.config.ConfigSource;
26
- import org.embulk.config.NextConfig;
27
- import org.embulk.config.CommitReport;
28
- import org.embulk.spi.BufferAllocator;
29
- import org.embulk.spi.Exec;
30
- import org.embulk.spi.FileInputPlugin;
31
- import org.embulk.spi.TransactionalFileInput;
32
- import org.embulk.spi.util.InputStreamFileInput;
33
- import org.slf4j.Logger;
34
-
35
- import static org.embulk.spi.util.Inputs.formatPath;
36
-
37
- public class S3FileInputPlugin
38
- implements FileInputPlugin
39
- {
40
- public interface PluginTask
41
- extends Task
42
- {
43
- @Config("bucket")
44
- public String getBucket();
45
-
46
- @Config("paths")
47
- public List<String> getPathPrefixes();
48
-
49
- @Config("endpoint")
50
- public Optional<String> getEndpoint();
51
-
52
- // TODO timeout, ssl, etc
53
-
54
- @Config("access_key_id")
55
- public String getAccessKeyId();
56
-
57
- @Config("secret_access_key")
58
- public String getSecretAccessKey();
59
-
60
- // TODO support more options such as STS
61
-
62
- public List<String> getFiles();
63
- public void setFiles(List<String> files);
64
-
65
- @JacksonInject
66
- public BufferAllocator getBufferAllocator();
67
- }
68
-
69
- private final Logger log = Exec.getLogger(getClass());
70
-
71
- @Override
72
- public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
73
- {
74
- PluginTask task = config.loadConfig(PluginTask.class);
75
-
76
- // list files recursively
77
- task.setFiles(listFiles(task));
78
-
79
- // number of processors is same with number of files
80
- int processorCount = task.getFiles().size();
81
- return resume(task.dump(), processorCount, control);
82
- }
83
-
84
- @Override
85
- public NextConfig resume(TaskSource taskSource,
86
- int processorCount,
87
- FileInputPlugin.Control control)
88
- {
89
- control.run(taskSource, processorCount);
90
- return Exec.newNextConfig();
91
- }
92
-
93
- @Override
94
- public void cleanup(TaskSource taskSource,
95
- int processorCount,
96
- List<CommitReport> successCommitReports)
97
- { }
98
-
99
- public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
100
- {
101
- final AWSCredentials cred = new BasicAWSCredentials(
102
- task.getAccessKeyId(), task.getSecretAccessKey());
103
- return new AWSCredentialsProvider() {
104
- public AWSCredentials getCredentials()
105
- {
106
- return cred;
107
- }
108
-
109
- public void refresh()
110
- {
111
- }
112
- };
113
- }
114
-
115
- private static AmazonS3Client newS3Client(PluginTask task)
116
- {
117
- AWSCredentialsProvider credentials = getCredentialsProvider(task);
118
- AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
119
- return client;
120
- }
121
-
122
- private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
123
- Optional<String> endpoint)
124
- {
125
- // TODO get config from AmazonS3Task
126
- ClientConfiguration clientConfig = new ClientConfiguration();
127
- //clientConfig.setProtocol(Protocol.HTTP);
128
- clientConfig.setMaxConnections(50); // SDK default: 50
129
- clientConfig.setMaxErrorRetry(3); // SDK default: 3
130
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
131
-
132
- AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
133
-
134
- if (endpoint.isPresent()) {
135
- client.setEndpoint(endpoint.get());
136
- }
137
-
138
- return client;
139
- }
140
-
141
- public List<String> listFiles(PluginTask task)
142
- {
143
- AmazonS3Client client = newS3Client(task);
144
- String bucketName = task.getBucket();
145
-
146
- ImmutableList.Builder<String> builder = ImmutableList.builder();
147
- for (String prefix : task.getPathPrefixes()) {
148
- String formatted = formatPath(prefix);
149
- try {
150
- log.info("Listing S3 files with prefix '{}'", formatted);
151
- builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
152
- } catch (RuntimeException e) {
153
- throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
154
- }
155
- }
156
-
157
- return builder.build();
158
- }
159
-
160
- /**
161
- * Lists S3 filenames filtered by prefix.
162
- *
163
- * The resulting list does not include the file that's size == 0.
164
- */
165
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
166
- {
167
- // TODO implement retrying
168
- ImmutableList.Builder<String> builder = ImmutableList.builder();
169
-
170
- String lastKey = null;
171
- do {
172
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
173
- ObjectListing ol = client.listObjects(req);
174
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
175
- if (s.getSize() > 0) {
176
- builder.add(s.getKey());
177
- }
178
- }
179
- lastKey = ol.getNextMarker();
180
- } while(lastKey != null);
181
-
182
- return builder.build();
183
- }
184
-
185
- @Override
186
- public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
187
- {
188
- PluginTask task = taskSource.loadTask(PluginTask.class);
189
- return new S3FileInput(task, processorIndex);
190
- }
191
-
192
- public static class S3FileInput
193
- extends InputStreamFileInput
194
- implements TransactionalFileInput
195
- {
196
- // TODO create single-file InputStreamFileInput utility
197
- private static class SingleFileProvider
198
- implements InputStreamFileInput.Provider
199
- {
200
- private AmazonS3Client client;
201
- private final String bucket;
202
- private final String key;
203
- private boolean opened = false;
204
-
205
- public SingleFileProvider(PluginTask task, int processorIndex)
206
- {
207
- this.client = newS3Client(task);
208
- this.bucket = task.getBucket();
209
- this.key = task.getFiles().get(processorIndex);
210
- }
211
-
212
- @Override
213
- public InputStream openNext() throws IOException
214
- {
215
- if (opened) {
216
- return null;
217
- }
218
- opened = true;
219
- GetObjectRequest request = new GetObjectRequest(bucket, key);
220
- //if (pos > 0) {
221
- // request.setRange(pos, contentLength);
222
- //}
223
- S3Object obj = client.getObject(request);
224
- //if (pos <= 0) {
225
- // // first call
226
- // contentLength = obj.getObjectMetadata().getContentLength();
227
- //}
228
- return obj.getObjectContent();
229
- }
230
-
231
- @Override
232
- public void close() { }
233
- }
234
-
235
- public S3FileInput(PluginTask task, int processorIndex)
236
- {
237
- super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
238
- }
239
-
240
- public void abort() { }
241
-
242
- public CommitReport commit()
243
- {
244
- return Exec.newCommitReport();
245
- }
246
-
247
- @Override
248
- public void close() { }
249
- }
250
- }