embulk-output-s3_parquet 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,55 +11,63 @@ import org.embulk.config.TaskReport
11
11
  import org.embulk.output.s3_parquet.aws.Aws
12
12
  import org.embulk.spi.{Exec, Page, PageReader, TransactionalPageOutput}
13
13
 
14
+
14
15
  case class S3ParquetPageOutput(outputLocalFile: String,
15
16
  reader: PageReader,
16
17
  writer: ParquetWriter[PageReader],
17
18
  aws: Aws,
18
19
  destBucket: String,
19
20
  destKey: String)
20
- extends TransactionalPageOutput {
21
+ extends TransactionalPageOutput
22
+ {
21
23
 
22
- private var isClosed: Boolean = false
24
+ private var isClosed: Boolean = false
23
25
 
24
- override def add(page: Page): Unit = {
25
- reader.setPage(page)
26
- while (reader.nextRecord()) {
27
- writer.write(reader)
26
+ override def add(page: Page): Unit =
27
+ {
28
+ reader.setPage(page)
29
+ while (reader.nextRecord()) {
30
+ writer.write(reader)
31
+ }
28
32
  }
29
- }
30
33
 
31
- override def finish(): Unit = {
32
- }
34
+ override def finish(): Unit =
35
+ {
36
+ }
33
37
 
34
- override def close(): Unit = {
35
- synchronized {
36
- if (!isClosed) {
37
- writer.close()
38
- isClosed = true
39
- }
38
+ override def close(): Unit =
39
+ {
40
+ synchronized {
41
+ if (!isClosed) {
42
+ writer.close()
43
+ isClosed = true
44
+ }
45
+ }
40
46
  }
41
- }
42
47
 
43
- override def abort(): Unit = {
44
- close()
45
- cleanup()
46
- }
48
+ override def abort(): Unit =
49
+ {
50
+ close()
51
+ cleanup()
52
+ }
47
53
 
48
- override def commit(): TaskReport = {
49
- close()
50
- val result: UploadResult = aws.withTransferManager { xfer: TransferManager =>
51
- val upload: Upload = xfer.upload(destBucket, destKey, new File(outputLocalFile))
52
- upload.waitForUploadResult()
54
+ override def commit(): TaskReport =
55
+ {
56
+ close()
57
+ val result: UploadResult = aws.withTransferManager { xfer: TransferManager =>
58
+ val upload: Upload = xfer.upload(destBucket, destKey, new File(outputLocalFile))
59
+ upload.waitForUploadResult()
60
+ }
61
+ cleanup()
62
+ Exec.newTaskReport()
63
+ .set("bucket", result.getBucketName)
64
+ .set("key", result.getKey)
65
+ .set("etag", result.getETag)
66
+ .set("version_id", result.getVersionId)
53
67
  }
54
- cleanup()
55
- Exec.newTaskReport()
56
- .set("bucket", result.getBucketName)
57
- .set("key", result.getKey)
58
- .set("etag", result.getETag)
59
- .set("version_id", result.getVersionId)
60
- }
61
68
 
62
- private def cleanup(): Unit = {
63
- Files.delete(Paths.get(outputLocalFile))
64
- }
69
+ private def cleanup(): Unit =
70
+ {
71
+ Files.delete(Paths.get(outputLocalFile))
72
+ }
65
73
  }
@@ -2,44 +2,62 @@ package org.embulk.output.s3_parquet.aws
2
2
 
3
3
 
4
4
  import com.amazonaws.client.builder.AwsClientBuilder
5
+ import com.amazonaws.services.glue.{AWSGlue, AWSGlueClientBuilder}
5
6
  import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
6
7
  import com.amazonaws.services.s3.transfer.{TransferManager, TransferManagerBuilder}
7
8
 
8
- object Aws {
9
9
 
10
- trait Task
11
- extends AwsCredentials.Task
12
- with AwsEndpointConfiguration.Task
13
- with AwsClientConfiguration.Task
14
- with AwsS3Configuration.Task
10
+ object Aws
11
+ {
15
12
 
16
- def apply(task: Task): Aws = new Aws(task)
13
+ trait Task
14
+ extends AwsCredentials.Task
15
+ with AwsEndpointConfiguration.Task
16
+ with AwsClientConfiguration.Task
17
+ with AwsS3Configuration.Task
18
+
19
+ def apply(task: Task): Aws =
20
+ {
21
+ new Aws(task)
22
+ }
17
23
 
18
24
  }
19
25
 
20
- class Aws(task: Aws.Task) {
21
-
22
- def withS3[A](f: AmazonS3 => A): A = {
23
- val builder: AmazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
24
- AwsS3Configuration(task).configureAmazonS3ClientBuilder(builder)
25
- val svc = createService(builder)
26
- try f(svc)
27
- finally svc.shutdown()
28
- }
29
-
30
- def withTransferManager[A](f: TransferManager => A): A = {
31
- withS3 { s3 =>
32
- val svc = TransferManagerBuilder.standard().withS3Client(s3).build()
33
- try f(svc)
34
- finally svc.shutdownNow(false)
26
+ class Aws(task: Aws.Task)
27
+ {
28
+
29
+ def withS3[A](f: AmazonS3 => A): A =
30
+ {
31
+ val builder: AmazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
32
+ AwsS3Configuration(task).configureAmazonS3ClientBuilder(builder)
33
+ val svc = createService(builder)
34
+ try f(svc)
35
+ finally svc.shutdown()
36
+ }
37
+
38
+ def withTransferManager[A](f: TransferManager => A): A =
39
+ {
40
+ withS3 { s3 =>
41
+ val svc = TransferManagerBuilder.standard().withS3Client(s3).build()
42
+ try f(svc)
43
+ finally svc.shutdownNow(false)
44
+ }
45
+ }
46
+
47
+ def withGlue[A](f: AWSGlue => A): A =
48
+ {
49
+ val builder: AWSGlueClientBuilder = AWSGlueClientBuilder.standard()
50
+ val svc = createService(builder)
51
+ try f(svc)
52
+ finally svc.shutdown()
35
53
  }
36
- }
37
54
 
38
- def createService[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): T = {
39
- AwsEndpointConfiguration(task).configureAwsClientBuilder(builder)
40
- AwsClientConfiguration(task).configureAwsClientBuilder(builder)
41
- builder.setCredentials(AwsCredentials(task).createAwsCredentialsProvider)
55
+ def createService[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): T =
56
+ {
57
+ AwsEndpointConfiguration(task).configureAwsClientBuilder(builder)
58
+ AwsClientConfiguration(task).configureAwsClientBuilder(builder)
59
+ builder.setCredentials(AwsCredentials(task).createAwsCredentialsProvider)
42
60
 
43
- builder.build()
44
- }
61
+ builder.build()
62
+ }
45
63
  }
@@ -8,27 +8,35 @@ import com.amazonaws.client.builder.AwsClientBuilder
8
8
  import org.embulk.config.{Config, ConfigDefault}
9
9
  import org.embulk.output.s3_parquet.aws.AwsClientConfiguration.Task
10
10
 
11
- object AwsClientConfiguration {
12
11
 
13
- trait Task {
12
+ object AwsClientConfiguration
13
+ {
14
14
 
15
- @Config("http_proxy")
16
- @ConfigDefault("null")
17
- def getHttpProxy: Optional[HttpProxy.Task]
15
+ trait Task
16
+ {
18
17
 
19
- }
18
+ @Config("http_proxy")
19
+ @ConfigDefault("null")
20
+ def getHttpProxy: Optional[HttpProxy.Task]
20
21
 
21
- def apply(task: Task): AwsClientConfiguration = new AwsClientConfiguration(task)
22
+ }
23
+
24
+ def apply(task: Task): AwsClientConfiguration =
25
+ {
26
+ new AwsClientConfiguration(task)
27
+ }
22
28
  }
23
29
 
24
- class AwsClientConfiguration(task: Task) {
30
+ class AwsClientConfiguration(task: Task)
31
+ {
25
32
 
26
- def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit = {
27
- task.getHttpProxy.ifPresent { v =>
28
- val cc = new ClientConfiguration
29
- HttpProxy(v).configureClientConfiguration(cc)
30
- builder.setClientConfiguration(cc)
33
+ def configureAwsClientBuilder[S <: AwsClientBuilder[S, T], T](builder: AwsClientBuilder[S, T]): Unit =
34
+ {
35
+ task.getHttpProxy.ifPresent { v =>
36
+ val cc = new ClientConfiguration
37
+ HttpProxy(v).configureClientConfiguration(cc)
38
+ builder.setClientConfiguration(cc)
39
+ }
31
40
  }
32
- }
33
41
 
34
42
  }
@@ -9,120 +9,129 @@ import org.embulk.config.{Config, ConfigDefault, ConfigException}
9
9
  import org.embulk.output.s3_parquet.aws.AwsCredentials.Task
10
10
  import org.embulk.spi.unit.LocalFile
11
11
 
12
- object AwsCredentials {
13
12
 
14
- trait Task {
13
+ object AwsCredentials
14
+ {
15
15
 
16
- @Config("auth_method")
17
- @ConfigDefault("\"default\"")
18
- def getAuthMethod: String
16
+ trait Task
17
+ {
19
18
 
20
- @Config("access_key_id")
21
- @ConfigDefault("null")
22
- def getAccessKeyId: Optional[String]
19
+ @Config("auth_method")
20
+ @ConfigDefault("\"default\"")
21
+ def getAuthMethod: String
23
22
 
24
- @Config("secret_access_key")
25
- @ConfigDefault("null")
26
- def getSecretAccessKey: Optional[String]
23
+ @Config("access_key_id")
24
+ @ConfigDefault("null")
25
+ def getAccessKeyId: Optional[String]
27
26
 
28
- @Config("session_token")
29
- @ConfigDefault("null")
30
- def getSessionToken: Optional[String]
27
+ @Config("secret_access_key")
28
+ @ConfigDefault("null")
29
+ def getSecretAccessKey: Optional[String]
31
30
 
32
- @Config("profile_file")
33
- @ConfigDefault("null")
34
- def getProfileFile: Optional[LocalFile]
31
+ @Config("session_token")
32
+ @ConfigDefault("null")
33
+ def getSessionToken: Optional[String]
35
34
 
36
- @Config("profile_name")
37
- @ConfigDefault("\"default\"")
38
- def getProfileName: String
35
+ @Config("profile_file")
36
+ @ConfigDefault("null")
37
+ def getProfileFile: Optional[LocalFile]
39
38
 
40
- @Config("role_arn")
41
- @ConfigDefault("null")
42
- def getRoleArn: Optional[String]
39
+ @Config("profile_name")
40
+ @ConfigDefault("\"default\"")
41
+ def getProfileName: String
43
42
 
44
- @Config("role_session_name")
45
- @ConfigDefault("null")
46
- def getRoleSessionName: Optional[String]
43
+ @Config("role_arn")
44
+ @ConfigDefault("null")
45
+ def getRoleArn: Optional[String]
47
46
 
48
- @Config("role_external_id")
49
- @ConfigDefault("null")
50
- def getRoleExternalId: Optional[String]
47
+ @Config("role_session_name")
48
+ @ConfigDefault("null")
49
+ def getRoleSessionName: Optional[String]
51
50
 
52
- @Config("role_session_duration_seconds")
53
- @ConfigDefault("null")
54
- def getRoleSessionDurationSeconds: Optional[Int]
51
+ @Config("role_external_id")
52
+ @ConfigDefault("null")
53
+ def getRoleExternalId: Optional[String]
55
54
 
56
- @Config("scope_down_policy")
57
- @ConfigDefault("null")
58
- def getScopeDownPolicy: Optional[String]
55
+ @Config("role_session_duration_seconds")
56
+ @ConfigDefault("null")
57
+ def getRoleSessionDurationSeconds: Optional[Int]
59
58
 
60
- }
59
+ @Config("scope_down_policy")
60
+ @ConfigDefault("null")
61
+ def getScopeDownPolicy: Optional[String]
61
62
 
62
- def apply(task: Task): AwsCredentials = new AwsCredentials(task)
63
- }
64
-
65
- class AwsCredentials(task: Task) {
66
-
67
- def createAwsCredentialsProvider: AWSCredentialsProvider = {
68
- task.getAuthMethod match {
69
- case "basic" =>
70
- new AWSStaticCredentialsProvider(new BasicAWSCredentials(
71
- getRequiredOption(task.getAccessKeyId, "access_key_id"),
72
- getRequiredOption(task.getAccessKeyId, "secret_access_key")
73
- ))
74
-
75
- case "env" =>
76
- new EnvironmentVariableCredentialsProvider
63
+ }
77
64
 
78
- case "instance" =>
79
- // NOTE: combination of InstanceProfileCredentialsProvider and ContainerCredentialsProvider
80
- new EC2ContainerCredentialsProviderWrapper
65
+ def apply(task: Task): AwsCredentials =
66
+ {
67
+ new AwsCredentials(task)
68
+ }
69
+ }
81
70
 
82
- case "profile" =>
83
- if (task.getProfileFile.isPresent) {
84
- val pf: ProfilesConfigFile = new ProfilesConfigFile(task.getProfileFile.get().getFile)
85
- new ProfileCredentialsProvider(pf, task.getProfileName)
71
+ class AwsCredentials(task: Task)
72
+ {
73
+
74
+ def createAwsCredentialsProvider: AWSCredentialsProvider =
75
+ {
76
+ task.getAuthMethod match {
77
+ case "basic" =>
78
+ new AWSStaticCredentialsProvider(new BasicAWSCredentials(
79
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
80
+ getRequiredOption(task.getAccessKeyId, "secret_access_key")
81
+ ))
82
+
83
+ case "env" =>
84
+ new EnvironmentVariableCredentialsProvider
85
+
86
+ case "instance" =>
87
+ // NOTE: combination of InstanceProfileCredentialsProvider and ContainerCredentialsProvider
88
+ new EC2ContainerCredentialsProviderWrapper
89
+
90
+ case "profile" =>
91
+ if (task.getProfileFile.isPresent) {
92
+ val pf: ProfilesConfigFile = new ProfilesConfigFile(task.getProfileFile.get().getFile)
93
+ new ProfileCredentialsProvider(pf, task.getProfileName)
94
+ }
95
+ else new ProfileCredentialsProvider(task.getProfileName)
96
+
97
+ case "properties" =>
98
+ new SystemPropertiesCredentialsProvider
99
+
100
+ case "anonymous" =>
101
+ new AWSStaticCredentialsProvider(new AnonymousAWSCredentials)
102
+
103
+ case "session" =>
104
+ new AWSStaticCredentialsProvider(new BasicSessionCredentials(
105
+ getRequiredOption(task.getAccessKeyId, "access_key_id"),
106
+ getRequiredOption(task.getSecretAccessKey, "secret_access_key"),
107
+ getRequiredOption(task.getSessionToken, "session_token")
108
+ ))
109
+
110
+ case "assume_role" =>
111
+ // NOTE: Are http_proxy, endpoint, region required when assuming role?
112
+ val builder = new STSAssumeRoleSessionCredentialsProvider.Builder(
113
+ getRequiredOption(task.getRoleArn, "role_arn"),
114
+ getRequiredOption(task.getRoleSessionName, "role_session_name")
115
+ )
116
+ task.getRoleExternalId.ifPresent(v => builder.withExternalId(v))
117
+ task.getRoleSessionDurationSeconds.ifPresent(v => builder.withRoleSessionDurationSeconds(v))
118
+ task.getScopeDownPolicy.ifPresent(v => builder.withScopeDownPolicy(v))
119
+
120
+ builder.build()
121
+
122
+ case "default" =>
123
+ new DefaultAWSCredentialsProviderChain
124
+
125
+ case am =>
126
+ throw new ConfigException(s"'$am' is unsupported: `auth_method` must be one of ['basic', 'env', 'instance', 'profile', 'properties', 'anonymous', 'session', 'assume_role', 'default'].")
86
127
  }
87
- else new ProfileCredentialsProvider(task.getProfileName)
88
-
89
- case "properties" =>
90
- new SystemPropertiesCredentialsProvider
91
-
92
- case "anonymous" =>
93
- new AWSStaticCredentialsProvider(new AnonymousAWSCredentials)
94
-
95
- case "session" =>
96
- new AWSStaticCredentialsProvider(new BasicSessionCredentials(
97
- getRequiredOption(task.getAccessKeyId, "access_key_id"),
98
- getRequiredOption(task.getSecretAccessKey, "secret_access_key"),
99
- getRequiredOption(task.getSessionToken, "session_token")
100
- ))
101
-
102
- case "assume_role" =>
103
- // NOTE: Are http_proxy, endpoint, region required when assuming role?
104
- val builder = new STSAssumeRoleSessionCredentialsProvider.Builder(
105
- getRequiredOption(task.getRoleArn, "role_arn"),
106
- getRequiredOption(task.getRoleSessionName, "role_session_name")
107
- )
108
- task.getRoleExternalId.ifPresent(v => builder.withExternalId(v))
109
- task.getRoleSessionDurationSeconds.ifPresent(v => builder.withRoleSessionDurationSeconds(v))
110
- task.getScopeDownPolicy.ifPresent(v => builder.withScopeDownPolicy(v))
111
-
112
- builder.build()
113
-
114
- case "default" =>
115
- new DefaultAWSCredentialsProviderChain
116
-
117
- case am =>
118
- throw new ConfigException(s"'$am' is unsupported: `auth_method` must be one of ['basic', 'env', 'instance', 'profile', 'properties', 'anonymous', 'session', 'assume_role', 'default'].")
119
128
  }
120
- }
121
129
 
122
- private def getRequiredOption[A](o: Optional[A],
123
- name: String): A = {
124
- o.orElseThrow(() => new ConfigException(s"`$name` must be set when `auth_method` is ${task.getAuthMethod}."))
125
- }
130
+ private def getRequiredOption[A](o: Optional[A],
131
+ name: String): A =
132
+ {
133
+ o.orElseThrow(() => new ConfigException(s"`$name` must be set when `auth_method` is ${task.getAuthMethod}."))
134
+ }
126
135
 
127
136
 
128
137
  }