embulk-output-s3_parquet 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,55 +11,63 @@ import org.embulk.config.TaskReport
11
11
  import org.embulk.output.s3_parquet.aws.Aws
12
12
  import org.embulk.spi.{Exec, Page, PageReader, TransactionalPageOutput}
13
13
 
14
+
14
15
  case class S3ParquetPageOutput(outputLocalFile: String,
15
16
  reader: PageReader,
16
17
  writer: ParquetWriter[PageReader],
17
18
  aws: Aws,
18
19
  destBucket: String,
19
20
  destKey: String)
20
- extends TransactionalPageOutput {
21
+ extends TransactionalPageOutput
22
+ {
21
23
 
22
- private var isClosed: Boolean = false
24
+ private var isClosed: Boolean = false
23
25
 
24
- override def add(page: Page): Unit = {
25
- reader.setPage(page)
26
- while (reader.nextRecord()) {
27
- writer.write(reader)
26
+ override def add(page: Page): Unit =
27
+ {
28
+ reader.setPage(page)
29
+ while (reader.nextRecord()) {
30
+ writer.write(reader)
31
+ }
28
32
  }
29
- }
30
33
 
31
- override def finish(): Unit = {
32
- }
34
+ override def finish(): Unit =
35
+ {
36
+ }
33
37
 
34
- override def close(): Unit = {
35
- synchronized {
36
- if (!isClosed) {
37
- writer.close()
38
- isClosed = true
39
- }
38
+ override def close(): Unit =
39
+ {
40
+ synchronized {
41
+ if (!isClosed) {
42
+ writer.close()
43
+ isClosed = true
44
+ }
45
+ }
40
46
  }
41
- }
42
47
 
43
- override def abort(): Unit = {
44
- close()
45
- cleanup()
46
- }
48
+ override def abort(): Unit =
49
+ {
50
+ close()
51
+ cleanup()
52
+ }
47
53
 
48
- override def commit(): TaskReport = {
49
- close()
50
- val result: UploadResult = aws.withTransferManager { xfer: TransferManager =>
51
- val upload: Upload = xfer.upload(destBucket, destKey, new File(outputLocalFile))
52
- upload.waitForUploadResult()
54
+ override def commit(): TaskReport =
55
+ {
56
+ close()
57
+ val result: UploadResult = aws.withTransferManager { xfer: TransferManager =>
58
+ val upload: Upload = xfer.upload(destBucket, destKey, new File(outputLocalFile))
59
+ upload.waitForUploadResult()
60
+ }
61
+ cleanup()
62
+ Exec.newTaskReport()
63
+ .set("bucket", result.getBucketName)
64
+ .set("key", result.getKey)
65
+ .set("etag", result.getETag)
66
+ .set("version_id", result.getVersionId)
53
67
  }
54
- cleanup()
55
- Exec.newTaskReport()
56
- .set("bucket", result.getBucketName)
57
- .set("key", result.getKey)
58
- .set("etag", result.getETag)
59
- .set("version_id", result.getVersionId)
60
- }
61
68
 
62
- private def cleanup(): Unit = {
63
- Files.delete(Paths.get(outputLocalFile))
64
- }
69
+ private def cleanup(): Unit =
70
+ {
71
+ Files.delete(Paths.get(outputLocalFile))
72
+ }
65
73
  }
@@ -2,44 +2,62 @@ package org.embulk.output.s3_parquet.aws
2
2
 
3
3
 
4
4
  import com.amazonaws.client.builder.AwsClientBuilder
5
+ import com.amazonaws.services.glue.{AWSGlue, AWSGlueClientBuilder}
5
6
  import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
6
7
  import com.amazonaws.services.s3.transfer.{TransferManager, TransferManagerBuilder}
7
8
 
8
- object Aws {
9
9
 
10
- trait Task
11
- extends AwsCredentials.Task
12
- with AwsEndpointConfiguration.Task
13
- with AwsClientConfiguration.Task
14
- with AwsS3Configuration.Task
10
+ object Aws
11
+ {
15
12
 
16
- def apply(task: Task): Aws = new Aws(task)
13
+ trait Task
14
+ extends AwsCredentials.Task
15
+ with AwsEndpointConfiguration.Task
16
+ with AwsClientConfiguration.Task
17
+ with AwsS3Configuration.Task
18
+
19
+ def apply(task: Task): Aws =
20
+ {
21
+ new Aws(task)
22
+ }
17
23
 
18
24
  }
19
25
 
20
- class Aws(task: Aws.Task) {
21
-
22
- def withS3[A](f: AmazonS3 => A): A = {
23
- val builder: AmazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
24
- AwsS3Configuration(task).configureAmazonS3ClientBuilder(builder)
25
- val svc = createService(builder)
26
- try f(svc)
27
- finally svc.shutdown()
28
- }
29
-
30
- def withTransferManager[A](f: TransferManager => A): A = {
31
- withS3 { s3 =>
32
- val svc = TransferManagerBuilder.standard().withS3Client(s3).build()
33
- try f(svc)
34
- finally svc.shutdownNow(false)
26
+ class Aws(task: Aws.Task)
27
+ {
28
+
29
+ def withS3[A](f: AmazonS3 => A): A =
30
+ {
31
+ val builder: AmazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
32
+ AwsS3Configuration(task).configureAmazonS3ClientBuilder(builder)
33
+ val svc = createService(builder)
34
+ try f(svc)
35
+ finally svc.shutdown()
36
+ }
37
+
38
+ def withTransferManager[A](f: TransferManager => A): A =
39
+ {
40
+ withS3 { s3 =>
41
+ val svc = TransferManagerBuilder.standard().withS3Client(s3).build()
42
+ try f(svc)
43
+ finally svc.shutdownNow(false)
44
+ }
45
+ }
46
+
47
+ def withGlue[A](f: AWSGlue => A): A =
48
+ {
49
+ val builder: AWSGlueClientBuilder = AWSGlueClientBuilder.standard()
50
+ val svc = createService(builder)
51
+ try f(svc)
52
+ finally svc.shutdown()
35
53
  }
36
- }
37
54
 
38
- def createService[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): T = {
39
- AwsEndpointConfiguration(task).configureAwsClientBuilder(builder)
40
- AwsClientConfiguration(task).configureAwsClientBuilder(builder)
41
- builder.setCredentials(AwsCredentials(task).createAwsCredentialsProvider)
55
+ def createService[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): T =
56
+ {
57
+ AwsEndpointConfiguration(task).configureAwsClientBuilder(builder)
58
+ AwsClientConfiguration(task).configureAwsClientBuilder(builder)
59
+ builder.setCredentials(AwsCredentials(task).createAwsCredentialsProvider)
42
60
 
43
- builder.build()
44
- }
61
+ builder.build()
62
+ }
45
63
  }
@@ -8,27 +8,35 @@ import com.amazonaws.client.builder.AwsClientBuilder
8
8
  import org.embulk.config.{Config, ConfigDefault}
9
9
  import org.embulk.output.s3_parquet.aws.AwsClientConfiguration.Task
10
10
 
11
- object AwsClientConfiguration {
12
11
 
13
- trait Task {
12
+ object AwsClientConfiguration
13
+ {
14
14
 
15
- @Config("http_proxy")
16
- @ConfigDefault("null")
17
- def getHttpProxy: Optional[HttpProxy.Task]
15
+ trait Task
16
+ {
18
17
 
19
- }
18
+ @Config("http_proxy")
19
+ @ConfigDefault("null")
20
+ def getHttpProxy: Optional[HttpProxy.Task]
20
21
 
21
- def apply(task: Task): AwsClientConfiguration = new AwsClientConfiguration(task)
22
+ }
23
+
24
+ def apply(task: Task): AwsClientConfiguration =
25
+ {
26
+ new AwsClientConfiguration(task)
27
+ }
22
28
  }
23
29
 
24
- class AwsClientConfiguration(task: Task) {
30
+ class AwsClientConfiguration(task: Task)
31
+ {
25
32
 
26
- def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
27
- task.getHttpProxy.ifPresent { v =>
28
- val cc = new ClientConfiguration
29
- HttpProxy(v).configureClientConfiguration(cc)
30
- builder.setClientConfiguration(cc)
33
+ def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit =
34
+ {
35
+ task.getHttpProxy.ifPresent { v =>
36
+ val cc = new ClientConfiguration
37
+ HttpProxy(v).configureClientConfiguration(cc)
38
+ builder.setClientConfiguration(cc)
39
+ }
31
40
  }
32
- }
33
41
 
34
42
  }
@@ -9,120 +9,129 @@ import org.embulk.config.{Config, ConfigDefault, ConfigException}
9
9
  import org.embulk.output.s3_parquet.aws.AwsCredentials.Task
10
10
  import org.embulk.spi.unit.LocalFile
11
11
 
12
- object AwsCredentials {
13
12
 
14
- trait Task {
13
+ object AwsCredentials
14
+ {
15
15
 
16
- @Config("auth_method")
17
- @ConfigDefault("\"default\"")
18
- def getAuthMethod: String
16
+ trait Task
17
+ {
19
18
 
20
- @Config("access_key_id")
21
- @ConfigDefault("null")
22
- def getAccessKeyId: Optional[String]
19
+ @Config("auth_method")
20
+ @ConfigDefault("\"default\"")
21
+ def getAuthMethod: String
23
22
 
24
- @Config("secret_access_key")
25
- @ConfigDefault("null")
26
- def getSecretAccessKey: Optional[String]
23
+ @Config("access_key_id")
24
+ @ConfigDefault("null")
25
+ def getAccessKeyId: Optional[String]
27
26
 
28
- @Config("session_token")
29
- @ConfigDefault("null")
30
- def getSessionToken: Optional[String]
27
+ @Config("secret_access_key")
28
+ @ConfigDefault("null")
29
+ def getSecretAccessKey: Optional[String]
31
30
 
32
- @Config("profile_file")
33
- @ConfigDefault("null")
34
- def getProfileFile: Optional[LocalFile]
31
+ @Config("session_token")
32
+ @ConfigDefault("null")
33
+ def getSessionToken: Optional[String]
35
34
 
36
- @Config("profile_name")
37
- @ConfigDefault("\"default\"")
38
- def getProfileName: String
35
+ @Config("profile_file")
36
+ @ConfigDefault("null")
37
+ def getProfileFile: Optional[LocalFile]
39
38
 
40
- @Config("role_arn")
41
- @ConfigDefault("null")
42
- def getRoleArn: Optional[String]
39
+ @Config("profile_name")
40
+ @ConfigDefault("\"default\"")
41
+ def getProfileName: String
43
42
 
44
- @Config("role_session_name")
45
- @ConfigDefault("null")
46
- def getRoleSessionName: Optional[String]
43
+ @Config("role_arn")
44
+ @ConfigDefault("null")
45
+ def getRoleArn: Optional[String]
47
46
 
48
- @Config("role_external_id")
49
- @ConfigDefault("null")
50
- def getRoleExternalId: Optional[String]
47
+ @Config("role_session_name")
48
+ @ConfigDefault("null")
49
+ def getRoleSessionName: Optional[String]
51
50
 
52
- @Config("role_session_duration_seconds")
53
- @ConfigDefault("null")
54
- def getRoleSessionDurationSeconds: Optional[Int]
51
+ @Config("role_external_id")
52
+ @ConfigDefault("null")
53
+ def getRoleExternalId: Optional[String]
55
54
 
56
- @Config("scope_down_policy")
57
- @ConfigDefault("null")
58
- def getScopeDownPolicy: Optional[String]
55
+ @Config("role_session_duration_seconds")
56
+ @ConfigDefault("null")
57
+ def getRoleSessionDurationSeconds: Optional[Int]
59
58
 
60
- }
59
+ @Config("scope_down_policy")
60
+ @ConfigDefault("null")
61
+ def getScopeDownPolicy: Optional[String]
61
62
 
62
- def apply(task: Task): AwsCredentials = new AwsCredentials(task)
63
- }
64
-
65
- class AwsCredentials(task: Task) {
66
-
67
- def createAwsCredentialsProvider: AWSCredentialsProvider = {
68
- task.getAuthMethod match {
69
- case "basic" =>
70
- new AWSStaticCredentialsProvider(new BasicAWSCredentials(
71
- getRequiredOption(task.getAccessKeyId, "access_key_id"),
72
- getRequiredOption(task.getAccessKeyId, "secret_access_key")
73
- ))
74
-
75
- case "env" =>
76
- new EnvironmentVariableCredentialsProvider
63
+ }
77
64
 
78
- case "instance" =>
79
- // NOTE: combination of InstanceProfileCredentialsProvider and ContainerCredentialsProvider
80
- new EC2ContainerCredentialsProviderWrapper
65
+ def apply(task: Task): AwsCredentials =
66
+ {
67
+ new AwsCredentials(task)
68
+ }
69
+ }
81
70
 
82
- case "profile" =>
83
- if (task.getProfileFile.isPresent) {
84
- val pf: ProfilesConfigFile = new ProfilesConfigFile(task.getProfileFile.get().getFile)
85
- new ProfileCredentialsProvider(pf, task.getProfileName)
71
+ class AwsCredentials(task: Task)
72
+ {
73
+
74
+ def createAwsCredentialsProvider: AWSCredentialsProvider =
75
+ {
76
+ task.getAuthMethod match {
77
+ case "basic" =>
78
+ new AWSStaticCredentialsProvider(new BasicAWSCredentials(
79
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
80
+ getRequiredOption(task.getAccessKeyId, "secret_access_key")
81
+ ))
82
+
83
+ case "env" =>
84
+ new EnvironmentVariableCredentialsProvider
85
+
86
+ case "instance" =>
87
+ // NOTE: combination of InstanceProfileCredentialsProvider and ContainerCredentialsProvider
88
+ new EC2ContainerCredentialsProviderWrapper
89
+
90
+ case "profile" =>
91
+ if (task.getProfileFile.isPresent) {
92
+ val pf: ProfilesConfigFile = new ProfilesConfigFile(task.getProfileFile.get().getFile)
93
+ new ProfileCredentialsProvider(pf, task.getProfileName)
94
+ }
95
+ else new ProfileCredentialsProvider(task.getProfileName)
96
+
97
+ case "properties" =>
98
+ new SystemPropertiesCredentialsProvider
99
+
100
+ case "anonymous" =>
101
+ new AWSStaticCredentialsProvider(new AnonymousAWSCredentials)
102
+
103
+ case "session" =>
104
+ new AWSStaticCredentialsProvider(new BasicSessionCredentials(
105
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
106
+ getRequiredOption(task.getSecretAccessKey, "secret_access_key"),
107
+ getRequiredOption(task.getSessionToken, "session_token")
108
+ ))
109
+
110
+ case "assume_role" =>
111
+ // NOTE: Are http_proxy, endpoint, region required when assuming role?
112
+ val builder = new STSAssumeRoleSessionCredentialsProvider.Builder(
113
+ getRequiredOption(task.getRoleArn, "role_arn"),
114
+ getRequiredOption(task.getRoleSessionName, "role_session_name")
115
+ )
116
+ task.getRoleExternalId.ifPresent(v => builder.withExternalId(v))
117
+ task.getRoleSessionDurationSeconds.ifPresent(v => builder.withRoleSessionDurationSeconds(v))
118
+ task.getScopeDownPolicy.ifPresent(v => builder.withScopeDownPolicy(v))
119
+
120
+ builder.build()
121
+
122
+ case "default" =>
123
+ new DefaultAWSCredentialsProviderChain
124
+
125
+ case am =>
126
+ throw new ConfigException(s"'$am' is unsupported: `auth_method` must be one of ['basic', 'env', 'instance', 'profile', 'properties', 'anonymous', 'session', 'assume_role', 'default'].")
86
127
  }
87
- else new ProfileCredentialsProvider(task.getProfileName)
88
-
89
- case "properties" =>
90
- new SystemPropertiesCredentialsProvider
91
-
92
- case "anonymous" =>
93
- new AWSStaticCredentialsProvider(new AnonymousAWSCredentials)
94
-
95
- case "session" =>
96
- new AWSStaticCredentialsProvider(new BasicSessionCredentials(
97
- getRequiredOption(task.getAccessKeyId, "access_key_id"),
98
- getRequiredOption(task.getSecretAccessKey, "secret_access_key"),
99
- getRequiredOption(task.getSessionToken, "session_token")
100
- ))
101
-
102
- case "assume_role" =>
103
- // NOTE: Are http_proxy, endpoint, region required when assuming role?
104
- val builder = new STSAssumeRoleSessionCredentialsProvider.Builder(
105
- getRequiredOption(task.getRoleArn, "role_arn"),
106
- getRequiredOption(task.getRoleSessionName, "role_session_name")
107
- )
108
- task.getRoleExternalId.ifPresent(v => builder.withExternalId(v))
109
- task.getRoleSessionDurationSeconds.ifPresent(v => builder.withRoleSessionDurationSeconds(v))
110
- task.getScopeDownPolicy.ifPresent(v => builder.withScopeDownPolicy(v))
111
-
112
- builder.build()
113
-
114
- case "default" =>
115
- new DefaultAWSCredentialsProviderChain
116
-
117
- case am =>
118
- throw new ConfigException(s"'$am' is unsupported: `auth_method` must be one of ['basic', 'env', 'instance', 'profile', 'properties', 'anonymous', 'session', 'assume_role', 'default'].")
119
128
  }
120
- }
121
129
 
122
- private def getRequiredOption[A](o: Optional[A],
123
- name: String): A = {
124
- o.orElseThrow(() => new ConfigException(s"`$name` must be set when `auth_method` is ${task.getAuthMethod}."))
125
- }
130
+ private def getRequiredOption[A](o: Optional[A],
131
+ name: String): A =
132
+ {
133
+ o.orElseThrow(() => new ConfigException(s"`$name` must be set when `auth_method` is ${task.getAuthMethod}."))
134
+ }
126
135
 
127
136
 
128
137
  }