embulk-output-s3_parquet 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.scalafmt.conf +9 -0
  4. data/CHANGELOG.md +9 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +122 -0
  7. data/build.gradle +101 -0
  8. data/example/config.yml +25 -0
  9. data/example/data.tsv +5 -0
  10. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  12. data/gradlew +172 -0
  13. data/gradlew.bat +84 -0
  14. data/lib/embulk/output/s3_parquet.rb +3 -0
  15. data/settings.gradle +1 -0
  16. data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetOutputPlugin.scala +199 -0
  17. data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetPageOutput.scala +65 -0
  18. data/src/main/scala/org/embulk/output/s3_parquet/aws/Aws.scala +45 -0
  19. data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsClientConfiguration.scala +34 -0
  20. data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsCredentials.scala +128 -0
  21. data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsEndpointConfiguration.scala +49 -0
  22. data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala +56 -0
  23. data/src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala +56 -0
  24. data/src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala +59 -0
  25. data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala +33 -0
  26. data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala +125 -0
  27. data/src/test/resources/org/embulk/output/s3_parquet/in1.csv +6 -0
  28. data/src/test/resources/org/embulk/output/s3_parquet/out1.tsv +5 -0
  29. data/src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala +140 -0
  30. metadata +184 -0
data/gradlew.bat ADDED
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ "s3_parquet", "org.embulk.output.s3_parquet.S3ParquetOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
data/settings.gradle ADDED
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-output-s3_parquet'
@@ -0,0 +1,199 @@
1
+ package org.embulk.output.s3_parquet
2
+
3
+
4
+ import java.nio.file.{Files, Paths}
5
+ import java.util.{IllegalFormatException, Locale, Optional, List => JList, Map => JMap}
6
+
7
+ import com.amazonaws.services.s3.model.CannedAccessControlList
8
+ import org.apache.parquet.column.ParquetProperties
9
+ import org.apache.parquet.hadoop.ParquetWriter
10
+ import org.apache.parquet.hadoop.metadata.CompressionCodecName
11
+ import org.embulk.config.{Config, ConfigDefault, ConfigDiff, ConfigException, ConfigSource, Task, TaskReport, TaskSource}
12
+ import org.embulk.output.s3_parquet.S3ParquetOutputPlugin.PluginTask
13
+ import org.embulk.output.s3_parquet.aws.Aws
14
+ import org.embulk.output.s3_parquet.parquet.ParquetFileWriter
15
+ import org.embulk.spi.{Exec, OutputPlugin, PageReader, Schema, TransactionalPageOutput}
16
+ import org.embulk.spi.time.TimestampFormatter
17
+ import org.embulk.spi.time.TimestampFormatter.TimestampColumnOption
18
+ import org.embulk.spi.util.Timestamps
19
+ import org.slf4j.Logger
20
+
21
+ object S3ParquetOutputPlugin {
22
+
23
+ trait PluginTask
24
+ extends Task
25
+ with TimestampFormatter.Task
26
+ with Aws.Task {
27
+
28
+ @Config("bucket")
29
+ def getBucket: String
30
+
31
+ @Config("path_prefix")
32
+ @ConfigDefault("\"\"")
33
+ def getPathPrefix: String
34
+
35
+ @Config("sequence_format")
36
+ @ConfigDefault("\"%03d.%02d.\"")
37
+ def getSequenceFormat: String
38
+
39
+ @Config("file_ext")
40
+ @ConfigDefault("\"parquet\"")
41
+ def getFileExt: String
42
+
43
+ @Config("compression_codec")
44
+ @ConfigDefault("\"uncompressed\"")
45
+ def getCompressionCodecString: String
46
+
47
+ def setCompressionCodec(v: CompressionCodecName): Unit
48
+
49
+ def getCompressionCodec: CompressionCodecName
50
+
51
+ @Config("column_options")
52
+ @ConfigDefault("{}")
53
+ def getColumnOptions: JMap[String, TimestampColumnOption]
54
+
55
+ @Config("canned_acl")
56
+ @ConfigDefault("\"private\"")
57
+ def getCannedAclString: String
58
+
59
+ def setCannedAcl(v: CannedAccessControlList): Unit
60
+
61
+ def getCannedAcl: CannedAccessControlList
62
+
63
+ @Config("block_size")
64
+ @ConfigDefault("null")
65
+ def getBlockSize: Optional[Int]
66
+
67
+ @Config("page_size")
68
+ @ConfigDefault("null")
69
+ def getPageSize: Optional[Int]
70
+
71
+ @Config("max_padding_size")
72
+ @ConfigDefault("null")
73
+ def getMaxPaddingSize: Optional[Int]
74
+
75
+ @Config("enable_dictionary_encoding")
76
+ @ConfigDefault("null")
77
+ def getEnableDictionaryEncoding: Optional[Boolean]
78
+
79
+ @Config("buffer_dir")
80
+ @ConfigDefault("null")
81
+ def getBufferDir: Optional[String]
82
+
83
+ }
84
+
85
+ }
86
+
87
+ class S3ParquetOutputPlugin
88
+ extends OutputPlugin {
89
+
90
+ val logger: Logger = Exec.getLogger(classOf[S3ParquetOutputPlugin])
91
+
92
+ private def withPluginContextClassLoader[A](f: => A): A = {
93
+ val original: ClassLoader = Thread.currentThread.getContextClassLoader
94
+ Thread.currentThread.setContextClassLoader(classOf[S3ParquetOutputPlugin].getClassLoader)
95
+ try f
96
+ finally Thread.currentThread.setContextClassLoader(original)
97
+ }
98
+
99
+ override def transaction(config: ConfigSource,
100
+ schema: Schema,
101
+ taskCount: Int,
102
+ control: OutputPlugin.Control): ConfigDiff = {
103
+ val task: PluginTask = config.loadConfig(classOf[PluginTask])
104
+
105
+ withPluginContextClassLoader {
106
+ configure(task, schema)
107
+ control.run(task.dump)
108
+ }
109
+
110
+ Exec.newConfigDiff
111
+ }
112
+
113
+ private def configure(task: PluginTask,
114
+ schema: Schema): Unit = {
115
+ // sequence_format
116
+ try String.format(task.getSequenceFormat, 0: Integer, 0: Integer)
117
+ catch {
118
+ case e: IllegalFormatException => throw new ConfigException(s"Invalid sequence_format: ${task.getSequenceFormat}", e)
119
+ }
120
+
121
+ // compression_codec
122
+ CompressionCodecName.values().find(v => v.name().toLowerCase(Locale.ENGLISH).equals(task.getCompressionCodecString)) match {
123
+ case Some(v) => task.setCompressionCodec(v)
124
+ case None =>
125
+ val unsupported: String = task.getCompressionCodecString
126
+ val supported: String = CompressionCodecName.values().map(v => s"'${v.name().toLowerCase}'").mkString(", ")
127
+ throw new ConfigException(s"'$unsupported' is unsupported: `compression_codec` must be one of [$supported].")
128
+ }
129
+
130
+ // column_options
131
+ task.getColumnOptions.forEach { (k: String,
132
+ _) =>
133
+ val c = schema.lookupColumn(k)
134
+ if (!c.getType.getName.equals("timestamp")) throw new ConfigException(s"column:$k is not 'timestamp' type.")
135
+ }
136
+
137
+ // canned_acl
138
+ CannedAccessControlList.values().find(v => v.toString.equals(task.getCannedAclString)) match {
139
+ case Some(v) => task.setCannedAcl(v)
140
+ case None =>
141
+ val unsupported: String = task.getCannedAclString
142
+ val supported: String = CannedAccessControlList.values().map(v => s"'${v.toString}'").mkString(", ")
143
+ throw new ConfigException(s"'$unsupported' is unsupported: `canned_acl` must be one of [$supported].")
144
+ }
145
+ }
146
+
147
+ override def resume(taskSource: TaskSource,
148
+ schema: Schema,
149
+ taskCount: Int,
150
+ control: OutputPlugin.Control): ConfigDiff = {
151
+ throw new UnsupportedOperationException("s3_parquet output plugin does not support resuming")
152
+ }
153
+
154
+ override def cleanup(taskSource: TaskSource,
155
+ schema: Schema,
156
+ taskCount: Int,
157
+ successTaskReports: JList[TaskReport]): Unit = {
158
+ successTaskReports.forEach { tr =>
159
+ logger.info(
160
+ s"Created: s3://${tr.get(classOf[String], "bucket")}/${tr.get(classOf[String], "key")}, "
161
+ + s"version_id: ${tr.get(classOf[String], "version_id", null)}, "
162
+ + s"etag: ${tr.get(classOf[String], "etag", null)}")
163
+ }
164
+ }
165
+
166
+ override def open(taskSource: TaskSource,
167
+ schema: Schema,
168
+ taskIndex: Int): TransactionalPageOutput = {
169
+ val task = taskSource.loadTask(classOf[PluginTask])
170
+ val bufferDir: String = task.getBufferDir.orElse(Files.createTempDirectory("embulk-output-s3_parquet-").toString)
171
+ val bufferFile: String = Paths.get(bufferDir, s"embulk-output-s3_parquet-task-$taskIndex-0.parquet").toString
172
+ val destS3bucket: String = task.getBucket
173
+ val destS3Key: String = task.getPathPrefix + String.format(task.getSequenceFormat, taskIndex: Integer, 0: Integer) + task.getFileExt
174
+
175
+
176
+ val pageReader: PageReader = new PageReader(schema)
177
+ val aws: Aws = Aws(task)
178
+ val timestampFormatters: Seq[TimestampFormatter] = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions)
179
+ val parquetWriter: ParquetWriter[PageReader] = ParquetFileWriter.builder()
180
+ .withPath(bufferFile)
181
+ .withSchema(schema)
182
+ .withTimestampFormatters(timestampFormatters)
183
+ .withCompressionCodec(task.getCompressionCodec)
184
+ .withDictionaryEncoding(task.getEnableDictionaryEncoding.orElse(ParquetProperties.DEFAULT_IS_DICTIONARY_ENABLED))
185
+ .withDictionaryPageSize(task.getPageSize.orElse(ParquetProperties.DEFAULT_DICTIONARY_PAGE_SIZE))
186
+ .withMaxPaddingSize(task.getMaxPaddingSize.orElse(ParquetWriter.MAX_PADDING_SIZE_DEFAULT))
187
+ .withPageSize(task.getPageSize.orElse(ParquetProperties.DEFAULT_PAGE_SIZE))
188
+ .withRowGroupSize(task.getBlockSize.orElse(ParquetWriter.DEFAULT_BLOCK_SIZE))
189
+ .withValidation(ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED)
190
+ .withWriteMode(org.apache.parquet.hadoop.ParquetFileWriter.Mode.CREATE)
191
+ .withWriterVersion(ParquetProperties.DEFAULT_WRITER_VERSION)
192
+ .build()
193
+
194
+ logger.info(s"Local Buffer File: $bufferFile, Destination: s3://$destS3bucket/$destS3Key")
195
+
196
+ S3ParquetPageOutput(bufferFile, pageReader, parquetWriter, aws, destS3bucket, destS3Key)
197
+ }
198
+
199
+ }
@@ -0,0 +1,65 @@
1
+ package org.embulk.output.s3_parquet
2
+
3
+
4
+ import java.io.File
5
+ import java.nio.file.{Files, Paths}
6
+
7
+ import com.amazonaws.services.s3.transfer.{TransferManager, Upload}
8
+ import com.amazonaws.services.s3.transfer.model.UploadResult
9
+ import org.apache.parquet.hadoop.ParquetWriter
10
+ import org.embulk.config.TaskReport
11
+ import org.embulk.output.s3_parquet.aws.Aws
12
+ import org.embulk.spi.{Exec, Page, PageReader, TransactionalPageOutput}
13
+
14
+ case class S3ParquetPageOutput(outputLocalFile: String,
15
+ reader: PageReader,
16
+ writer: ParquetWriter[PageReader],
17
+ aws: Aws,
18
+ destBucket: String,
19
+ destKey: String)
20
+ extends TransactionalPageOutput {
21
+
22
+ private var isClosed: Boolean = false
23
+
24
+ override def add(page: Page): Unit = {
25
+ reader.setPage(page)
26
+ while (reader.nextRecord()) {
27
+ writer.write(reader)
28
+ }
29
+ }
30
+
31
+ override def finish(): Unit = {
32
+ }
33
+
34
+ override def close(): Unit = {
35
+ synchronized {
36
+ if (!isClosed) {
37
+ writer.close()
38
+ isClosed = true
39
+ }
40
+ }
41
+ }
42
+
43
+ override def abort(): Unit = {
44
+ close()
45
+ cleanup()
46
+ }
47
+
48
+ override def commit(): TaskReport = {
49
+ close()
50
+ val result: UploadResult = aws.withTransferManager { xfer: TransferManager =>
51
+ val upload: Upload = xfer.upload(destBucket, destKey, new File(outputLocalFile))
52
+ upload.waitForUploadResult()
53
+ }
54
+ cleanup()
55
+ Exec.newTaskReport()
56
+ .set("bucket", result.getBucketName)
57
+ .set("key", result.getKey)
58
+ .set("etag", result.getETag)
59
+ .set("version_id", result.getVersionId)
60
+ }
61
+
62
+ private def cleanup(): Unit = {
63
+ Files.delete(Paths.get(outputLocalFile))
64
+ }
65
+ }
@@ -0,0 +1,45 @@
1
+ package org.embulk.output.s3_parquet.aws
2
+
3
+
4
+ import com.amazonaws.client.builder.AwsClientBuilder
5
+ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
6
+ import com.amazonaws.services.s3.transfer.{TransferManager, TransferManagerBuilder}
7
+
8
+ object Aws {
9
+
10
+ trait Task
11
+ extends AwsCredentials.Task
12
+ with AwsEndpointConfiguration.Task
13
+ with AwsClientConfiguration.Task
14
+ with AwsS3Configuration.Task
15
+
16
+ def apply(task: Task): Aws = new Aws(task)
17
+
18
+ }
19
+
20
+ class Aws(task: Aws.Task) {
21
+
22
+ def withS3[A](f: AmazonS3 => A): A = {
23
+ val builder: AmazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
24
+ AwsS3Configuration(task).configureAmazonS3ClientBuilder(builder)
25
+ val svc = createService(builder)
26
+ try f(svc)
27
+ finally svc.shutdown()
28
+ }
29
+
30
+ def withTransferManager[A](f: TransferManager => A): A = {
31
+ withS3 { s3 =>
32
+ val svc = TransferManagerBuilder.standard().withS3Client(s3).build()
33
+ try f(svc)
34
+ finally svc.shutdownNow(false)
35
+ }
36
+ }
37
+
38
+ def createService[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): T = {
39
+ AwsEndpointConfiguration(task).configureAwsClientBuilder(builder)
40
+ AwsClientConfiguration(task).configureAwsClientBuilder(builder)
41
+ builder.setCredentials(AwsCredentials(task).createAwsCredentialsProvider)
42
+
43
+ builder.build()
44
+ }
45
+ }
@@ -0,0 +1,34 @@
1
+ package org.embulk.output.s3_parquet.aws
2
+
3
+
4
+ import java.util.Optional
5
+
6
+ import com.amazonaws.ClientConfiguration
7
+ import com.amazonaws.client.builder.AwsClientBuilder
8
+ import org.embulk.config.{Config, ConfigDefault}
9
+ import org.embulk.output.s3_parquet.aws.AwsClientConfiguration.Task
10
+
11
+ object AwsClientConfiguration {
12
+
13
+ trait Task {
14
+
15
+ @Config("http_proxy")
16
+ @ConfigDefault("null")
17
+ def getHttpProxy: Optional[HttpProxy.Task]
18
+
19
+ }
20
+
21
+ def apply(task: Task): AwsClientConfiguration = new AwsClientConfiguration(task)
22
+ }
23
+
24
+ class AwsClientConfiguration(task: Task) {
25
+
26
+ def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
27
+ task.getHttpProxy.ifPresent { v =>
28
+ val cc = new ClientConfiguration
29
+ HttpProxy(v).configureClientConfiguration(cc)
30
+ builder.setClientConfiguration(cc)
31
+ }
32
+ }
33
+
34
+ }
@@ -0,0 +1,128 @@
1
+ package org.embulk.output.s3_parquet.aws
2
+
3
+
4
+ import java.util.Optional
5
+
6
+ import com.amazonaws.auth.{AnonymousAWSCredentials, AWSCredentialsProvider, AWSStaticCredentialsProvider, BasicAWSCredentials, BasicSessionCredentials, DefaultAWSCredentialsProviderChain, EC2ContainerCredentialsProviderWrapper, EnvironmentVariableCredentialsProvider, STSAssumeRoleSessionCredentialsProvider, SystemPropertiesCredentialsProvider}
7
+ import com.amazonaws.auth.profile.{ProfileCredentialsProvider, ProfilesConfigFile}
8
+ import org.embulk.config.{Config, ConfigDefault, ConfigException}
9
+ import org.embulk.output.s3_parquet.aws.AwsCredentials.Task
10
+ import org.embulk.spi.unit.LocalFile
11
+
12
+ object AwsCredentials {
13
+
14
+ trait Task {
15
+
16
+ @Config("auth_method")
17
+ @ConfigDefault("\"default\"")
18
+ def getAuthMethod: String
19
+
20
+ @Config("access_key_id")
21
+ @ConfigDefault("null")
22
+ def getAccessKeyId: Optional[String]
23
+
24
+ @Config("secret_access_key")
25
+ @ConfigDefault("null")
26
+ def getSecretAccessKey: Optional[String]
27
+
28
+ @Config("session_token")
29
+ @ConfigDefault("null")
30
+ def getSessionToken: Optional[String]
31
+
32
+ @Config("profile_file")
33
+ @ConfigDefault("null")
34
+ def getProfileFile: Optional[LocalFile]
35
+
36
+ @Config("profile_name")
37
+ @ConfigDefault("\"default\"")
38
+ def getProfileName: String
39
+
40
+ @Config("role_arn")
41
+ @ConfigDefault("null")
42
+ def getRoleArn: Optional[String]
43
+
44
+ @Config("role_session_name")
45
+ @ConfigDefault("null")
46
+ def getRoleSessionName: Optional[String]
47
+
48
+ @Config("role_external_id")
49
+ @ConfigDefault("null")
50
+ def getRoleExternalId: Optional[String]
51
+
52
+ @Config("role_session_duration_seconds")
53
+ @ConfigDefault("null")
54
+ def getRoleSessionDurationSeconds: Optional[Int]
55
+
56
+ @Config("scope_down_policy")
57
+ @ConfigDefault("null")
58
+ def getScopeDownPolicy: Optional[String]
59
+
60
+ }
61
+
62
+ def apply(task: Task): AwsCredentials = new AwsCredentials(task)
63
+ }
64
+
65
+ class AwsCredentials(task: Task) {
66
+
67
+ def createAwsCredentialsProvider: AWSCredentialsProvider = {
68
+ task.getAuthMethod match {
69
+ case "basic" =>
70
+ new AWSStaticCredentialsProvider(new BasicAWSCredentials(
71
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
72
+ getRequiredOption(task.getAccessKeyId, "secret_access_key")
73
+ ))
74
+
75
+ case "env" =>
76
+ new EnvironmentVariableCredentialsProvider
77
+
78
+ case "instance" =>
79
+ // NOTE: combination of InstanceProfileCredentialsProvider and ContainerCredentialsProvider
80
+ new EC2ContainerCredentialsProviderWrapper
81
+
82
+ case "profile" =>
83
+ if (task.getProfileFile.isPresent) {
84
+ val pf: ProfilesConfigFile = new ProfilesConfigFile(task.getProfileFile.get().getFile)
85
+ new ProfileCredentialsProvider(pf, task.getProfileName)
86
+ }
87
+ else new ProfileCredentialsProvider(task.getProfileName)
88
+
89
+ case "properties" =>
90
+ new SystemPropertiesCredentialsProvider
91
+
92
+ case "anonymous" =>
93
+ new AWSStaticCredentialsProvider(new AnonymousAWSCredentials)
94
+
95
+ case "session" =>
96
+ new AWSStaticCredentialsProvider(new BasicSessionCredentials(
97
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
98
+ getRequiredOption(task.getSecretAccessKey, "secret_access_key"),
99
+ getRequiredOption(task.getSessionToken, "session_token")
100
+ ))
101
+
102
+ case "assume_role" =>
103
+ // NOTE: Are http_proxy, endpoint, region required when assuming role?
104
+ val builder = new STSAssumeRoleSessionCredentialsProvider.Builder(
105
+ getRequiredOption(task.getRoleArn, "role_arn"),
106
+ getRequiredOption(task.getRoleSessionName, "role_session_name")
107
+ )
108
+ task.getRoleExternalId.ifPresent(v => builder.withExternalId(v))
109
+ task.getRoleSessionDurationSeconds.ifPresent(v => builder.withRoleSessionDurationSeconds(v))
110
+ task.getScopeDownPolicy.ifPresent(v => builder.withScopeDownPolicy(v))
111
+
112
+ builder.build()
113
+
114
+ case "default" =>
115
+ new DefaultAWSCredentialsProviderChain
116
+
117
+ case am =>
118
+ throw new ConfigException(s"'$am' is unsupported: `auth_method` must be one of ['basic', 'env', 'instance', 'profile', 'properties', 'anonymous', 'session', 'assume_role', 'default'].")
119
+ }
120
+ }
121
+
122
+ private def getRequiredOption[A](o: Optional[A],
123
+ name: String): A = {
124
+ o.orElseThrow(() => new ConfigException(s"`$name` must be set when `auth_method` is ${task.getAuthMethod}."))
125
+ }
126
+
127
+
128
+ }