embulk 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. checksums.yaml +5 -13
  2. data/.travis.yml +16 -0
  3. data/Gemfile +0 -1
  4. data/README.md +37 -19
  5. data/Rakefile +5 -37
  6. data/bin/embulk +1 -1
  7. data/build.gradle +178 -95
  8. data/embulk-core/build.gradle +1 -1
  9. data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
  10. data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
  11. data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
  12. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
  13. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
  14. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
  15. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
  17. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
  18. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
  19. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
  20. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
  21. data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
  22. data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
  23. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
  24. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
  25. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
  26. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
  27. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
  28. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
  29. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
  30. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
  32. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
  33. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
  35. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
  36. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
  37. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
  38. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
  39. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
  40. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
  41. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
  42. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
  43. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
  44. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
  45. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
  46. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
  47. data/embulk-docs/Makefile +178 -0
  48. data/embulk-docs/build.gradle +20 -0
  49. data/embulk-docs/make.bat +243 -0
  50. data/embulk-docs/push-gh-pages.sh +29 -0
  51. data/embulk-docs/src/conf.py +260 -0
  52. data/embulk-docs/src/index.rst +19 -0
  53. data/embulk-docs/src/release.rst +14 -0
  54. data/embulk-docs/src/release/release-0.1.0.rst +8 -0
  55. data/embulk-docs/src/release/release-0.2.0.rst +16 -0
  56. data/embulk-docs/src/release/release-0.2.1.rst +19 -0
  57. data/embulk-docs/src/release/release-0.3.0.rst +34 -0
  58. data/embulk-docs/src/release/release-0.3.1.rst +11 -0
  59. data/embulk-docs/src/release/release-0.3.2.rst +15 -0
  60. data/embulk-docs/src/release/release-0.4.0.rst +74 -0
  61. data/embulk-standards/build.gradle +0 -1
  62. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
  63. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
  64. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
  65. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
  66. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
  67. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
  68. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
  69. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
  70. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  71. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  72. data/lib/embulk/buffer.rb +2 -2
  73. data/lib/embulk/column.rb +6 -6
  74. data/lib/embulk/command/embulk_example.rb +1 -1
  75. data/lib/embulk/command/embulk_new_plugin.rb +87 -0
  76. data/lib/embulk/command/embulk_run.rb +84 -26
  77. data/lib/embulk/data/bundle/Gemfile +12 -20
  78. data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
  79. data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
  80. data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
  81. data/lib/embulk/data/new/LICENSE.txt +21 -0
  82. data/lib/embulk/data/new/README.md.erb +75 -0
  83. data/lib/embulk/data/new/gitignore.erb +12 -0
  84. data/lib/embulk/data/new/java/build.gradle.erb +57 -0
  85. data/lib/embulk/data/new/java/decoder.java.erb +40 -0
  86. data/lib/embulk/data/new/java/encoder.java.erb +40 -0
  87. data/lib/embulk/data/new/java/file_input.java.erb +64 -0
  88. data/lib/embulk/data/new/java/file_output.java.erb +66 -0
  89. data/lib/embulk/data/new/java/filter.java.erb +47 -0
  90. data/lib/embulk/data/new/java/formatter.java.erb +45 -0
  91. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  92. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
  93. data/lib/embulk/data/new/java/gradlew +164 -0
  94. data/lib/embulk/data/new/java/gradlew.bat +90 -0
  95. data/lib/embulk/data/new/java/input.java.erb +69 -0
  96. data/lib/embulk/data/new/java/output.java.erb +65 -0
  97. data/lib/embulk/data/new/java/parser.java.erb +51 -0
  98. data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
  99. data/lib/embulk/data/new/java/test.java.erb +5 -0
  100. data/lib/embulk/data/new/ruby/Gemfile +2 -0
  101. data/lib/embulk/data/new/ruby/Rakefile +1 -0
  102. data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
  103. data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
  104. data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
  105. data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
  106. data/lib/embulk/data/package_data.rb +64 -0
  107. data/lib/embulk/data_source.rb +2 -2
  108. data/lib/embulk/decoder_plugin.rb +27 -0
  109. data/lib/embulk/encoder_plugin.rb +27 -0
  110. data/lib/embulk/error.rb +3 -0
  111. data/lib/embulk/file_input_plugin.rb +27 -0
  112. data/lib/embulk/file_output_plugin.rb +27 -0
  113. data/lib/embulk/filter_plugin.rb +28 -9
  114. data/lib/embulk/formatter_plugin.rb +105 -0
  115. data/lib/embulk/guess_csv.rb +10 -1
  116. data/lib/embulk/guess_plugin.rb +22 -27
  117. data/lib/embulk/input_plugin.rb +34 -20
  118. data/lib/embulk/java/bootstrap.rb +5 -0
  119. data/lib/embulk/java/imports.rb +7 -0
  120. data/lib/embulk/java_plugin.rb +84 -0
  121. data/lib/embulk/output_plugin.rb +35 -19
  122. data/lib/embulk/page.rb +1 -1
  123. data/lib/embulk/page_builder.rb +1 -1
  124. data/lib/embulk/parser_plugin.rb +76 -0
  125. data/lib/embulk/plugin.rb +130 -65
  126. data/lib/embulk/plugin_registry.rb +19 -8
  127. data/lib/embulk/schema.rb +4 -4
  128. data/lib/embulk/version.rb +1 -1
  129. data/settings.gradle +1 -0
  130. metadata +123 -90
  131. data/ChangeLog +0 -46
  132. data/embulk-cli/pom.xml +0 -94
  133. data/embulk-core/pom.xml +0 -148
  134. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
  135. data/embulk-standards/pom.xml +0 -68
  136. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
  137. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
  138. data/pom.xml +0 -541
data/ChangeLog DELETED
@@ -1,46 +0,0 @@
1
-
2
- 2015-02-04 version 0.3.2:
3
-
4
- * Fixed a problem where ruby input plugins can't use timestamp type (reported
5
- by @shun0102)
6
- * Embulk::Page includes Enumerable to include map, each_with_index, and other
7
- a lot of convenient methods (@niku++)
8
- * Fixed TimestampType::DEFAULT_FORMAT to use ':' as the separator of times
9
-
10
-
11
- 2015-02-04 version 0.3.1:
12
-
13
- * Added -C, --classpath option to add java classpath
14
-
15
-
16
- 2015-02-03 version 0.3.0:
17
-
18
- * Added resume functionality. InputPlugin and OutputPlugin needs to implement
19
- resume and cleanup methods.
20
- * cli: embulk-run supports -r, --resume-state PATH option.
21
- * Added FilterInputPlugin Java API.
22
- * Added FilterInputPlugin JRuby API.
23
- * Configuration file accepts filters: array entry.
24
- * Added gradle-versions-plugin to build.gradle (@seratch++)
25
- * Fixed broken dependencies at build.gradle (@thagikura++)
26
-
27
-
28
- 2015-01-29 version 0.2.1:
29
-
30
- * Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
31
- * Fixed NextConfig to be merged to in: or out: rather than the top-level
32
- (reported by enukane) [#41]
33
- * ./bin/embulk shows warns to run `rake` if ./classpath doesn't exist
34
- * Embulk::PageBuilder#add accepts nil
35
-
36
-
37
- 2015-01-26 version 0.2.0:
38
-
39
- * Changed JRuby InputPlugin API to use #run instead of .run
40
- * Fixed 'example' subcommand to quote file path for Windows environment
41
-
42
-
43
- 2015-01-26 version 0.1.0:
44
-
45
- * The first release
46
-
data/embulk-cli/pom.xml DELETED
@@ -1,94 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-cli</artifactId>
12
- <name>embulk-cli</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>org.jruby</groupId>
17
- <artifactId>jruby-complete</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>org.embulk</groupId>
22
- <artifactId>embulk-core</artifactId>
23
- </dependency>
24
-
25
- <!-- TODO move embulk-standards to another repository -->
26
- <!-- TODO once embulk-standards is moved to another repository, replace this dependency with gem from rubygems-proxy -->
27
- <!-- and change Rakefile to include embulk-core instead of embulk-cli and gemspec depends on embulk-standards -->
28
- <dependency>
29
- <groupId>org.embulk</groupId>
30
- <artifactId>embulk-standards</artifactId>
31
- </dependency>
32
- </dependencies>
33
-
34
- <repositories>
35
- <!-- See https://github.com/jruby/jruby/wiki/Jruby-Scripting-container-using-Gems-with-a-Maven-Project -->
36
- <repository>
37
- <id>rubygems-proxy</id>
38
- <name>RubyGems Proxy</name>
39
- <url>http://rubygems-proxy.torquebox.org/releases</url>
40
- <layout>default</layout>
41
- <releases>
42
- <enabled>true</enabled>
43
- </releases>
44
- <snapshots>
45
- <enabled>false</enabled>
46
- <updatePolicy>never</updatePolicy>
47
- </snapshots>
48
- </repository>
49
- </repositories>
50
-
51
- <build>
52
- <plugins>
53
- <plugin>
54
- <groupId>de.saumya.mojo</groupId>
55
- <artifactId>gem-maven-plugin</artifactId>
56
- <version>1.0.0</version>
57
- <configuration>
58
- <includeRubygemsInResources>true</includeRubygemsInResources>
59
- </configuration>
60
- <executions>
61
- <execution>
62
- <goals>
63
- <goal>initialize</goal>
64
- </goals>
65
- </execution>
66
- </executions>
67
- </plugin>
68
-
69
- <plugin>
70
- <groupId>org.apache.maven.plugins</groupId>
71
- <artifactId>maven-shade-plugin</artifactId>
72
- <executions>
73
- <execution>
74
- <phase>package</phase>
75
- <goals>
76
- <goal>shade</goal>
77
- </goals>
78
- <configuration>
79
- <shadedArtifactAttached>true</shadedArtifactAttached>
80
- <shadedClassifierName>executable</shadedClassifierName>
81
- <transformers>
82
- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
83
- <manifestEntries>
84
- <Main-Class>org.embulk.cli.Main</Main-Class>
85
- </manifestEntries>
86
- </transformer>
87
- </transformers>
88
- </configuration>
89
- </execution>
90
- </executions>
91
- </plugin>
92
- </plugins>
93
- </build>
94
- </project>
data/embulk-core/pom.xml DELETED
@@ -1,148 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-core</artifactId>
12
- <name>embulk-core</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>commons-logging</groupId>
17
- <artifactId>commons-logging</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>com.google.guava</groupId>
22
- <artifactId>guava</artifactId>
23
- </dependency>
24
-
25
- <dependency>
26
- <groupId>com.google.inject</groupId>
27
- <artifactId>guice</artifactId>
28
- </dependency>
29
-
30
- <dependency>
31
- <groupId>com.google.inject.extensions</groupId>
32
- <artifactId>guice-multibindings</artifactId>
33
- </dependency>
34
-
35
- <dependency>
36
- <groupId>javax.inject</groupId>
37
- <artifactId>javax.inject</artifactId>
38
- </dependency>
39
-
40
- <dependency>
41
- <groupId>com.fasterxml.jackson.core</groupId>
42
- <artifactId>jackson-databind</artifactId>
43
- </dependency>
44
-
45
- <dependency>
46
- <groupId>com.fasterxml.jackson.core</groupId>
47
- <artifactId>jackson-annotations</artifactId>
48
- </dependency>
49
-
50
- <dependency>
51
- <groupId>com.fasterxml.jackson.core</groupId>
52
- <artifactId>jackson-core</artifactId>
53
- </dependency>
54
-
55
- <dependency>
56
- <groupId>com.fasterxml.jackson.datatype</groupId>
57
- <artifactId>jackson-datatype-guava</artifactId>
58
- </dependency>
59
-
60
- <dependency>
61
- <groupId>com.fasterxml.jackson.module</groupId>
62
- <artifactId>jackson-module-guice</artifactId>
63
- </dependency>
64
-
65
- <dependency>
66
- <groupId>com.fasterxml.jackson.datatype</groupId>
67
- <artifactId>jackson-datatype-joda</artifactId>
68
- </dependency>
69
-
70
- <dependency>
71
- <groupId>org.slf4j</groupId>
72
- <artifactId>slf4j-api</artifactId>
73
- </dependency>
74
-
75
- <dependency>
76
- <groupId>org.slf4j</groupId>
77
- <artifactId>slf4j-log4j12</artifactId>
78
- </dependency>
79
-
80
- <dependency>
81
- <groupId>log4j</groupId>
82
- <artifactId>log4j</artifactId>
83
- </dependency>
84
-
85
- <dependency>
86
- <groupId>joda-time</groupId>
87
- <artifactId>joda-time</artifactId>
88
- </dependency>
89
-
90
- <dependency>
91
- <groupId>org.yaml</groupId>
92
- <artifactId>snakeyaml</artifactId>
93
- </dependency>
94
-
95
- <dependency>
96
- <groupId>javax.validation</groupId>
97
- <artifactId>validation-api</artifactId>
98
- </dependency>
99
-
100
- <dependency>
101
- <groupId>org.apache.bval</groupId>
102
- <artifactId>bval-jsr303</artifactId>
103
- </dependency>
104
-
105
- <dependency>
106
- <groupId>io.airlift</groupId>
107
- <artifactId>slice</artifactId>
108
- </dependency>
109
-
110
- <dependency>
111
- <groupId>io.netty</groupId>
112
- <artifactId>netty-buffer</artifactId>
113
- </dependency>
114
-
115
- <dependency>
116
- <groupId>org.jruby</groupId>
117
- <artifactId>jruby-complete</artifactId>
118
- </dependency>
119
-
120
- <dependency>
121
- <groupId>com.google.code.findbugs</groupId>
122
- <artifactId>annotations</artifactId>
123
- </dependency>
124
-
125
- <!-- for guess_charset plugin -->
126
- <dependency>
127
- <groupId>com.ibm.icu</groupId>
128
- <artifactId>icu4j</artifactId>
129
- <scope>runtime</scope>
130
- </dependency>
131
-
132
- <dependency>
133
- <groupId>junit</groupId>
134
- <artifactId>junit</artifactId>
135
- </dependency>
136
- </dependencies>
137
-
138
- <build>
139
- <resources>
140
- <resource>
141
- <directory>src/main/resources</directory>
142
- </resource>
143
- <resource>
144
- <directory>../lib</directory>
145
- </resource>
146
- </resources>
147
- </build>
148
- </project>
@@ -1,26 +0,0 @@
1
- package org.embulk.config;
2
-
3
- public interface NextConfig
4
- extends DataSource
5
- {
6
- @Override
7
- public NextConfig getNested(String attrName);
8
-
9
- @Override
10
- public NextConfig getNestedOrSetEmpty(String attrName);
11
-
12
- @Override
13
- public NextConfig set(String attrName, Object v);
14
-
15
- @Override
16
- public NextConfig setNested(String attrName, DataSource v);
17
-
18
- @Override
19
- public NextConfig setAll(DataSource other);
20
-
21
- @Override
22
- public NextConfig deepCopy();
23
-
24
- @Override
25
- public NextConfig merge(DataSource other);
26
- }
@@ -1,68 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
- <modelVersion>4.0.0</modelVersion>
4
-
5
- <parent>
6
- <groupId>org.embulk</groupId>
7
- <artifactId>embulk-parent</artifactId>
8
- <version>0.3.2-SNAPSHOT</version>
9
- </parent>
10
-
11
- <artifactId>embulk-standards</artifactId>
12
- <name>embulk-standards</name>
13
-
14
- <dependencies>
15
- <dependency>
16
- <groupId>org.embulk</groupId>
17
- <artifactId>embulk-core</artifactId>
18
- </dependency>
19
-
20
- <dependency>
21
- <groupId>org.embulk</groupId>
22
- <artifactId>embulk-core</artifactId>
23
- <type>test-jar</type>
24
- <scope>test</scope>
25
- </dependency>
26
-
27
- <dependency>
28
- <groupId>com.google.guava</groupId>
29
- <artifactId>guava</artifactId>
30
- </dependency>
31
-
32
- <dependency>
33
- <groupId>com.google.inject</groupId>
34
- <artifactId>guice</artifactId>
35
- </dependency>
36
-
37
- <dependency>
38
- <groupId>javax.validation</groupId>
39
- <artifactId>validation-api</artifactId>
40
- </dependency>
41
-
42
- <dependency>
43
- <groupId>com.fasterxml.jackson.core</groupId>
44
- <artifactId>jackson-databind</artifactId>
45
- </dependency>
46
-
47
- <dependency>
48
- <groupId>org.slf4j</groupId>
49
- <artifactId>slf4j-api</artifactId>
50
- </dependency>
51
-
52
- <dependency>
53
- <groupId>com.amazonaws</groupId>
54
- <artifactId>aws-java-sdk</artifactId>
55
- <version>1.5.2</version>
56
- </dependency>
57
-
58
- <dependency>
59
- <groupId>junit</groupId>
60
- <artifactId>junit</artifactId>
61
- </dependency>
62
-
63
- <dependency>
64
- <groupId>org.mockito</groupId>
65
- <artifactId>mockito-core</artifactId>
66
- </dependency>
67
- </dependencies>
68
- </project>
@@ -1,250 +0,0 @@
1
- package org.embulk.standards;
2
-
3
- import java.util.List;
4
- import java.io.IOException;
5
- import java.io.InputStream;
6
-
7
- import com.amazonaws.AmazonClientException;
8
- import com.amazonaws.AmazonServiceException;
9
- import com.google.common.collect.ImmutableList;
10
- import com.google.common.base.Optional;
11
- import com.fasterxml.jackson.annotation.JacksonInject;
12
- import com.amazonaws.auth.AWSCredentials;
13
- import com.amazonaws.auth.AWSCredentialsProvider;
14
- import com.amazonaws.auth.BasicAWSCredentials;
15
- import com.amazonaws.services.s3.AmazonS3Client;
16
- import com.amazonaws.services.s3.model.ListObjectsRequest;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
- import com.amazonaws.services.s3.model.ObjectListing;
19
- import com.amazonaws.services.s3.model.GetObjectRequest;
20
- import com.amazonaws.services.s3.model.S3Object;
21
- import com.amazonaws.ClientConfiguration;
22
- import org.embulk.config.Config;
23
- import org.embulk.config.Task;
24
- import org.embulk.config.TaskSource;
25
- import org.embulk.config.ConfigSource;
26
- import org.embulk.config.NextConfig;
27
- import org.embulk.config.CommitReport;
28
- import org.embulk.spi.BufferAllocator;
29
- import org.embulk.spi.Exec;
30
- import org.embulk.spi.FileInputPlugin;
31
- import org.embulk.spi.TransactionalFileInput;
32
- import org.embulk.spi.util.InputStreamFileInput;
33
- import org.slf4j.Logger;
34
-
35
- import static org.embulk.spi.util.Inputs.formatPath;
36
-
37
- public class S3FileInputPlugin
38
- implements FileInputPlugin
39
- {
40
- public interface PluginTask
41
- extends Task
42
- {
43
- @Config("bucket")
44
- public String getBucket();
45
-
46
- @Config("paths")
47
- public List<String> getPathPrefixes();
48
-
49
- @Config("endpoint")
50
- public Optional<String> getEndpoint();
51
-
52
- // TODO timeout, ssl, etc
53
-
54
- @Config("access_key_id")
55
- public String getAccessKeyId();
56
-
57
- @Config("secret_access_key")
58
- public String getSecretAccessKey();
59
-
60
- // TODO support more options such as STS
61
-
62
- public List<String> getFiles();
63
- public void setFiles(List<String> files);
64
-
65
- @JacksonInject
66
- public BufferAllocator getBufferAllocator();
67
- }
68
-
69
- private final Logger log = Exec.getLogger(getClass());
70
-
71
- @Override
72
- public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
73
- {
74
- PluginTask task = config.loadConfig(PluginTask.class);
75
-
76
- // list files recursively
77
- task.setFiles(listFiles(task));
78
-
79
- // number of processors is same with number of files
80
- int processorCount = task.getFiles().size();
81
- return resume(task.dump(), processorCount, control);
82
- }
83
-
84
- @Override
85
- public NextConfig resume(TaskSource taskSource,
86
- int processorCount,
87
- FileInputPlugin.Control control)
88
- {
89
- control.run(taskSource, processorCount);
90
- return Exec.newNextConfig();
91
- }
92
-
93
- @Override
94
- public void cleanup(TaskSource taskSource,
95
- int processorCount,
96
- List<CommitReport> successCommitReports)
97
- { }
98
-
99
- public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
100
- {
101
- final AWSCredentials cred = new BasicAWSCredentials(
102
- task.getAccessKeyId(), task.getSecretAccessKey());
103
- return new AWSCredentialsProvider() {
104
- public AWSCredentials getCredentials()
105
- {
106
- return cred;
107
- }
108
-
109
- public void refresh()
110
- {
111
- }
112
- };
113
- }
114
-
115
- private static AmazonS3Client newS3Client(PluginTask task)
116
- {
117
- AWSCredentialsProvider credentials = getCredentialsProvider(task);
118
- AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
119
- return client;
120
- }
121
-
122
- private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
123
- Optional<String> endpoint)
124
- {
125
- // TODO get config from AmazonS3Task
126
- ClientConfiguration clientConfig = new ClientConfiguration();
127
- //clientConfig.setProtocol(Protocol.HTTP);
128
- clientConfig.setMaxConnections(50); // SDK default: 50
129
- clientConfig.setMaxErrorRetry(3); // SDK default: 3
130
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
131
-
132
- AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
133
-
134
- if (endpoint.isPresent()) {
135
- client.setEndpoint(endpoint.get());
136
- }
137
-
138
- return client;
139
- }
140
-
141
- public List<String> listFiles(PluginTask task)
142
- {
143
- AmazonS3Client client = newS3Client(task);
144
- String bucketName = task.getBucket();
145
-
146
- ImmutableList.Builder<String> builder = ImmutableList.builder();
147
- for (String prefix : task.getPathPrefixes()) {
148
- String formatted = formatPath(prefix);
149
- try {
150
- log.info("Listing S3 files with prefix '{}'", formatted);
151
- builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
152
- } catch (RuntimeException e) {
153
- throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
154
- }
155
- }
156
-
157
- return builder.build();
158
- }
159
-
160
- /**
161
- * Lists S3 filenames filtered by prefix.
162
- *
163
- * The resulting list does not include the file that's size == 0.
164
- */
165
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
166
- {
167
- // TODO implement retrying
168
- ImmutableList.Builder<String> builder = ImmutableList.builder();
169
-
170
- String lastKey = null;
171
- do {
172
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
173
- ObjectListing ol = client.listObjects(req);
174
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
175
- if (s.getSize() > 0) {
176
- builder.add(s.getKey());
177
- }
178
- }
179
- lastKey = ol.getNextMarker();
180
- } while(lastKey != null);
181
-
182
- return builder.build();
183
- }
184
-
185
- @Override
186
- public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
187
- {
188
- PluginTask task = taskSource.loadTask(PluginTask.class);
189
- return new S3FileInput(task, processorIndex);
190
- }
191
-
192
- public static class S3FileInput
193
- extends InputStreamFileInput
194
- implements TransactionalFileInput
195
- {
196
- // TODO create single-file InputStreamFileInput utility
197
- private static class SingleFileProvider
198
- implements InputStreamFileInput.Provider
199
- {
200
- private AmazonS3Client client;
201
- private final String bucket;
202
- private final String key;
203
- private boolean opened = false;
204
-
205
- public SingleFileProvider(PluginTask task, int processorIndex)
206
- {
207
- this.client = newS3Client(task);
208
- this.bucket = task.getBucket();
209
- this.key = task.getFiles().get(processorIndex);
210
- }
211
-
212
- @Override
213
- public InputStream openNext() throws IOException
214
- {
215
- if (opened) {
216
- return null;
217
- }
218
- opened = true;
219
- GetObjectRequest request = new GetObjectRequest(bucket, key);
220
- //if (pos > 0) {
221
- // request.setRange(pos, contentLength);
222
- //}
223
- S3Object obj = client.getObject(request);
224
- //if (pos <= 0) {
225
- // // first call
226
- // contentLength = obj.getObjectMetadata().getContentLength();
227
- //}
228
- return obj.getObjectContent();
229
- }
230
-
231
- @Override
232
- public void close() { }
233
- }
234
-
235
- public S3FileInput(PluginTask task, int processorIndex)
236
- {
237
- super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
238
- }
239
-
240
- public void abort() { }
241
-
242
- public CommitReport commit()
243
- {
244
- return Exec.newCommitReport();
245
- }
246
-
247
- @Override
248
- public void close() { }
249
- }
250
- }