embulk-output-s3_parquet 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,9 +4,9 @@ package org.embulk.output.s3_parquet
4
4
  import java.io.File
5
5
  import java.nio.file.FileSystems
6
6
 
7
- import cloud.localstack.{DockerTestUtils, Localstack, TestUtils}
8
- import cloud.localstack.docker.LocalstackDocker
9
- import cloud.localstack.docker.annotation.LocalstackDockerConfiguration
7
+ import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials}
8
+ import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration
9
+ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
10
10
  import com.amazonaws.services.s3.transfer.TransferManagerBuilder
11
11
  import com.google.common.io.Resources
12
12
  import org.apache.hadoop.fs.{Path => HadoopPath}
@@ -18,7 +18,7 @@ import org.embulk.test.{EmbulkTests, TestingEmbulk}
18
18
  import org.junit.Rule
19
19
  import org.junit.runner.RunWith
20
20
  import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, DiagrammedAssertions, FunSuite}
21
- import org.scalatest.junit.JUnitRunner
21
+ import org.scalatestplus.junit.JUnitRunner
22
22
 
23
23
  import scala.annotation.meta.getter
24
24
  import scala.jdk.CollectionConverters._
@@ -33,22 +33,11 @@ class TestS3ParquetOutputPlugin
33
33
  {
34
34
 
35
35
  val RESOURCE_NAME_PREFIX: String = "org/embulk/output/s3_parquet/"
36
- val BUCKET_NAME: String = "my-bucket"
37
-
38
- val LOCALSTACK_DOCKER: LocalstackDocker = LocalstackDocker.INSTANCE
39
-
40
- override protected def beforeAll(): Unit =
41
- {
42
- Localstack.teardownInfrastructure()
43
- LOCALSTACK_DOCKER.startup(LocalstackDockerConfiguration.DEFAULT)
44
- super.beforeAll()
45
- }
46
-
47
- override protected def afterAll(): Unit =
48
- {
49
- LOCALSTACK_DOCKER.stop()
50
- super.afterAll()
51
- }
36
+ val TEST_S3_ENDPOINT: String = "http://localhost:4572"
37
+ val TEST_S3_REGION: String = "us-east-1"
38
+ val TEST_S3_ACCESS_KEY_ID: String = "test"
39
+ val TEST_S3_SECRET_ACCESS_KEY: String = "test"
40
+ val TEST_BUCKET_NAME: String = "my-bucket"
52
41
 
53
42
  @(Rule@getter)
54
43
  val embulk: TestingEmbulk = TestingEmbulk.builder()
@@ -56,7 +45,11 @@ class TestS3ParquetOutputPlugin
56
45
  .build()
57
46
 
58
47
  before {
59
- DockerTestUtils.getClientS3.createBucket(BUCKET_NAME)
48
+ withLocalStackS3Client(_.createBucket(TEST_BUCKET_NAME))
49
+ }
50
+
51
+ after {
52
+ withLocalStackS3Client(_.deleteBucket(TEST_BUCKET_NAME))
60
53
  }
61
54
 
62
55
  def defaultOutConfig(): ConfigSource =
@@ -64,11 +57,11 @@ class TestS3ParquetOutputPlugin
64
57
  embulk.newConfig()
65
58
  .set("type", "s3_parquet")
66
59
  .set("endpoint", "http://localhost:4572") // See https://github.com/localstack/localstack#overview
67
- .set("bucket", BUCKET_NAME)
60
+ .set("bucket", TEST_BUCKET_NAME)
68
61
  .set("path_prefix", "path/to/p")
69
62
  .set("auth_method", "basic")
70
- .set("access_key_id", TestUtils.TEST_ACCESS_KEY)
71
- .set("secret_access_key", TestUtils.TEST_SECRET_KEY)
63
+ .set("access_key_id", TEST_S3_ACCESS_KEY_ID)
64
+ .set("secret_access_key", TEST_S3_SECRET_ACCESS_KEY)
72
65
  .set("path_style_access_enabled", true)
73
66
  .set("default_timezone", "Asia/Tokyo")
74
67
  }
@@ -111,12 +104,14 @@ class TestS3ParquetOutputPlugin
111
104
  def readParquetFile(bucket: String,
112
105
  key: String): Seq[Map[String, String]] =
113
106
  {
114
- val xfer = TransferManagerBuilder.standard()
115
- .withS3Client(DockerTestUtils.getClientS3)
116
- .build()
117
107
  val createdParquetFile = embulk.createTempFile("in")
118
- try xfer.download(bucket, key, createdParquetFile.toFile).waitForCompletion()
119
- finally xfer.shutdownNow()
108
+ withLocalStackS3Client {s3 =>
109
+ val xfer = TransferManagerBuilder.standard()
110
+ .withS3Client(s3)
111
+ .build()
112
+ try xfer.download(bucket, key, createdParquetFile.toFile).waitForCompletion()
113
+ finally xfer.shutdownNow()
114
+ }
120
115
 
121
116
  val reader: ParquetReader[SimpleRecord] = ParquetReader
122
117
  .builder(new SimpleReadSupport(), new HadoopPath(createdParquetFile.toString))
@@ -146,4 +141,14 @@ class TestS3ParquetOutputPlugin
146
141
  FileSystems.getDefault.getPath(new File(url.toURI).getAbsolutePath)
147
142
  }
148
143
 
144
+ private def withLocalStackS3Client[A](f: AmazonS3 => A): A = {
145
+ val client: AmazonS3 = AmazonS3ClientBuilder.standard
146
+ .withEndpointConfiguration(new EndpointConfiguration(TEST_S3_ENDPOINT, TEST_S3_REGION))
147
+ .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(TEST_S3_ACCESS_KEY_ID, TEST_S3_SECRET_ACCESS_KEY)))
148
+ .withPathStyleAccessEnabled(true)
149
+ .build()
150
+
151
+ try f(client)
152
+ finally client.shutdown()
153
+ }
149
154
  }
@@ -0,0 +1,78 @@
1
+ package org.embulk.output.s3_parquet.parquet
2
+
3
+
4
+ import org.embulk.spi.DataException
5
+ import org.embulk.spi.`type`.Types
6
+ import org.junit.runner.RunWith
7
+ import org.scalatest.FunSuite
8
+ import org.scalatestplus.junit.JUnitRunner
9
+
10
+ import scala.util.Try
11
+
12
+
13
+ @RunWith(classOf[JUnitRunner])
14
+ class TestLogicalTypeHandler
15
+ extends FunSuite
16
+ {
17
+
18
+ test("IntLogicalTypeHandler.isConvertible() returns true for long") {
19
+ val h = Int8LogicalTypeHandler
20
+
21
+ assert(h.isConvertible(Types.LONG))
22
+ assert(!h.isConvertible(Types.BOOLEAN))
23
+ }
24
+
25
+ test("IntLogicalTypeHandler.consume() raises DataException if given type is not long") {
26
+ val h = Int8LogicalTypeHandler
27
+ val actual = Try(h.consume("invalid", null))
28
+
29
+ assert(actual.isFailure)
30
+ assert(actual.failed.get.isInstanceOf[DataException])
31
+ }
32
+
33
+
34
+ test("TimestampMillisLogicalTypeHandler.isConvertible() returns true for timestamp") {
35
+ val h = TimestampMillisLogicalTypeHandler
36
+
37
+ assert(h.isConvertible(Types.TIMESTAMP))
38
+ assert(!h.isConvertible(Types.BOOLEAN))
39
+ }
40
+
41
+ test("TimestampMillisLogicalTypeHandler.consume() raises DataException if given type is not timestamp") {
42
+ val h = TimestampMillisLogicalTypeHandler
43
+ val actual = Try(h.consume("invalid", null))
44
+
45
+ assert(actual.isFailure)
46
+ assert(actual.failed.get.isInstanceOf[DataException])
47
+ }
48
+
49
+
50
+ test("TimestampMicrosLogicalTypeHandler.isConvertible() returns true for timestamp") {
51
+ val h = TimestampMicrosLogicalTypeHandler
52
+
53
+ assert(h.isConvertible(Types.TIMESTAMP))
54
+ assert(!h.isConvertible(Types.BOOLEAN))
55
+ }
56
+
57
+ test("TimestampMicrosLogicalTypeHandler.consume() raises DataException if given type is not timestamp") {
58
+ val h = TimestampMicrosLogicalTypeHandler
59
+ val actual = Try(h.consume("invalid", null))
60
+
61
+ assert(actual.isFailure)
62
+ assert(actual.failed.get.isInstanceOf[DataException])
63
+ }
64
+
65
+ test("JsonLogicalTypeHandler.isConvertible() returns true for json") {
66
+ val h = JsonLogicalTypeHandler
67
+
68
+ assert(h.isConvertible(Types.JSON))
69
+ assert(!h.isConvertible(Types.BOOLEAN))
70
+ }
71
+
72
+ test("JsonLogicalTypeHandler.consume() raises DataException if given type is not json") {
73
+ val h = JsonLogicalTypeHandler
74
+ val actual = Try(h.consume("invalid", null))
75
+ assert(actual.isFailure)
76
+ assert(actual.failed.get.isInstanceOf[DataException])
77
+ }
78
+ }
@@ -0,0 +1,162 @@
1
+ package org.embulk.output.s3_parquet.parquet
2
+
3
+
4
+ import java.util.Optional
5
+
6
+ import com.google.common.base.{Optional => GOptional}
7
+ import org.embulk.config.{ConfigException, TaskSource}
8
+ import org.embulk.output.s3_parquet.S3ParquetOutputPlugin.{ColumnOptionTask, TypeOptionTask}
9
+ import org.embulk.spi.`type`.{Types, Type => EType}
10
+ import org.junit.runner.RunWith
11
+ import org.scalatest.FunSuite
12
+ import org.scalatestplus.junit.JUnitRunner
13
+
14
+ import scala.jdk.CollectionConverters._
15
+ import scala.util.Try
16
+
17
+
18
+ @RunWith(classOf[JUnitRunner])
19
+ class TestLogicalTypeHandlerStore
20
+ extends FunSuite
21
+ {
22
+ test("empty() returns empty maps") {
23
+ val rv = LogicalTypeHandlerStore.empty
24
+
25
+ assert(rv.fromColumnName.isEmpty)
26
+ assert(rv.fromEmbulkType.isEmpty)
27
+ }
28
+
29
+ test("fromEmbulkOptions() returns handlers for valid option tasks") {
30
+ val typeOpts = Map[String, TypeOptionTask](
31
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
32
+ ).asJava
33
+ val columnOpts = Map[String, ColumnOptionTask](
34
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("timestamp-micros")),
35
+ ).asJava
36
+
37
+ val expected1 = Map[EType, LogicalTypeHandler](
38
+ Types.TIMESTAMP -> TimestampMillisLogicalTypeHandler,
39
+ )
40
+ val expected2 = Map[String, LogicalTypeHandler](
41
+ "col1" -> TimestampMicrosLogicalTypeHandler,
42
+ )
43
+
44
+ val rv = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
45
+
46
+ assert(rv.fromEmbulkType == expected1)
47
+ assert(rv.fromColumnName == expected2)
48
+ }
49
+
50
+ test("fromEmbulkOptions() raises ConfigException if invalid option tasks given") {
51
+ val emptyTypeOpts = Map.empty[String, TypeOptionTask].asJava
52
+ val emptyColumnOpts = Map.empty[String, ColumnOptionTask].asJava
53
+
54
+ val invalidTypeOpts = Map[String, TypeOptionTask](
55
+ "unknown-embulk-type-name" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
56
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("unknown-parquet-logical-type-name")),
57
+ ).asJava
58
+ val invalidColumnOpts = Map[String, ColumnOptionTask](
59
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("unknown-parquet-logical-type-name")),
60
+ ).asJava
61
+
62
+ val try1 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(invalidTypeOpts, emptyColumnOpts))
63
+ assert(try1.isFailure)
64
+ assert(try1.failed.get.isInstanceOf[ConfigException])
65
+
66
+ val try2 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(emptyTypeOpts, invalidColumnOpts))
67
+ assert(try2.isFailure)
68
+ assert(try2.failed.get.isInstanceOf[ConfigException])
69
+
70
+ val try3 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(invalidTypeOpts, invalidColumnOpts))
71
+ assert(try3.isFailure)
72
+ assert(try3.failed.get.isInstanceOf[ConfigException])
73
+ }
74
+
75
+ test("get() returns a handler matched with primary column name condition") {
76
+ val typeOpts = Map[String, TypeOptionTask](
77
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
78
+ ).asJava
79
+ val columnOpts = Map[String, ColumnOptionTask](
80
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("timestamp-micros")),
81
+ ).asJava
82
+
83
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
84
+
85
+ // It matches both of column name and embulk type, and column name should be primary
86
+ val expected = Some(TimestampMicrosLogicalTypeHandler)
87
+ val actual = handlers.get("col1", Types.TIMESTAMP)
88
+
89
+ assert(actual == expected)
90
+ }
91
+
92
+ test("get() returns a handler matched with type name condition") {
93
+ val typeOpts = Map[String, TypeOptionTask](
94
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
95
+ ).asJava
96
+ val columnOpts = Map.empty[String, ColumnOptionTask].asJava
97
+
98
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
99
+
100
+ // It matches column name
101
+ val expected = Some(TimestampMillisLogicalTypeHandler)
102
+ val actual = handlers.get("col1", Types.TIMESTAMP)
103
+
104
+ assert(actual == expected)
105
+ }
106
+
107
+ test("get() returns None if not matched") {
108
+ val typeOpts = Map.empty[String, TypeOptionTask].asJava
109
+ val columnOpts = Map.empty[String, ColumnOptionTask].asJava
110
+
111
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
112
+
113
+ // It matches embulk type
114
+ val actual = handlers.get("col1", Types.TIMESTAMP)
115
+
116
+ assert(actual.isEmpty)
117
+ }
118
+
119
+ private case class DummyTypeOptionTask(lt: Optional[String])
120
+ extends TypeOptionTask
121
+ {
122
+ override def getLogicalType: Optional[String] =
123
+ {
124
+ lt
125
+ }
126
+
127
+ override def validate(): Unit =
128
+ {}
129
+
130
+ override def dump(): TaskSource =
131
+ {
132
+ null
133
+ }
134
+ }
135
+
136
+ private case class DummyColumnOptionTask(lt: Optional[String])
137
+ extends ColumnOptionTask
138
+ {
139
+ override def getTimeZoneId: GOptional[String] =
140
+ {
141
+ GOptional.absent[String]
142
+ }
143
+
144
+ override def getFormat: GOptional[String] =
145
+ {
146
+ GOptional.absent[String]
147
+ }
148
+
149
+ override def getLogicalType: Optional[String] =
150
+ {
151
+ lt
152
+ }
153
+
154
+ override def validate(): Unit =
155
+ {}
156
+
157
+ override def dump(): TaskSource =
158
+ {
159
+ null
160
+ }
161
+ }
162
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-s3_parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2019-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +45,9 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
+ - ".github/FUNDING.yml"
49
+ - ".github/workflows/release.yml"
50
+ - ".github/workflows/test.yml"
48
51
  - ".gitignore"
49
52
  - CHANGELOG.md
50
53
  - LICENSE.txt
@@ -59,14 +62,14 @@ files:
59
62
  - classpath/asm-3.1.jar
60
63
  - classpath/asm-5.0.4.jar
61
64
  - classpath/avro-1.7.7.jar
62
- - classpath/aws-java-sdk-core-1.11.592.jar
63
- - classpath/aws-java-sdk-glue-1.11.592.jar
64
- - classpath/aws-java-sdk-kms-1.11.592.jar
65
- - classpath/aws-java-sdk-s3-1.11.592.jar
66
- - classpath/aws-java-sdk-sts-1.11.592.jar
65
+ - classpath/aws-java-sdk-core-1.11.676.jar
66
+ - classpath/aws-java-sdk-glue-1.11.676.jar
67
+ - classpath/aws-java-sdk-kms-1.11.676.jar
68
+ - classpath/aws-java-sdk-s3-1.11.676.jar
69
+ - classpath/aws-java-sdk-sts-1.11.676.jar
67
70
  - classpath/commons-beanutils-1.7.0.jar
68
71
  - classpath/commons-cli-1.2.jar
69
- - classpath/commons-codec-1.10.jar
72
+ - classpath/commons-codec-1.11.jar
70
73
  - classpath/commons-collections-3.2.2.jar
71
74
  - classpath/commons-compress-1.4.1.jar
72
75
  - classpath/commons-configuration-1.6.jar
@@ -80,17 +83,17 @@ files:
80
83
  - classpath/curator-client-2.7.1.jar
81
84
  - classpath/curator-framework-2.7.1.jar
82
85
  - classpath/curator-recipes-2.7.1.jar
83
- - classpath/embulk-output-s3_parquet-0.0.3.jar
86
+ - classpath/embulk-output-s3_parquet-0.1.0.jar
84
87
  - classpath/gson-2.2.4.jar
85
88
  - classpath/hadoop-annotations-2.9.2.jar
86
89
  - classpath/hadoop-auth-2.9.2.jar
87
90
  - classpath/hadoop-common-2.9.2.jar
88
91
  - classpath/htrace-core4-4.1.0-incubating.jar
89
- - classpath/httpclient-4.5.5.jar
90
- - classpath/httpcore-4.4.9.jar
92
+ - classpath/httpclient-4.5.9.jar
93
+ - classpath/httpcore-4.4.11.jar
91
94
  - classpath/ion-java-1.0.2.jar
92
95
  - classpath/jackson-core-asl-1.9.13.jar
93
- - classpath/jackson-databind-2.6.7.2.jar
96
+ - classpath/jackson-databind-2.6.7.3.jar
94
97
  - classpath/jackson-dataformat-cbor-2.6.7.jar
95
98
  - classpath/jackson-jaxrs-1.8.3.jar
96
99
  - classpath/jackson-mapper-asl-1.9.13.jar
@@ -108,7 +111,7 @@ files:
108
111
  - classpath/jetty-sslengine-6.1.26.jar
109
112
  - classpath/jetty-util-6.1.26.jar
110
113
  - classpath/jline-0.9.94.jar
111
- - classpath/jmespath-java-1.11.592.jar
114
+ - classpath/jmespath-java-1.11.676.jar
112
115
  - classpath/jsch-0.1.54.jar
113
116
  - classpath/json-smart-2.3.jar
114
117
  - classpath/jsp-api-2.1.jar
@@ -124,7 +127,7 @@ files:
124
127
  - classpath/parquet-hadoop-1.10.1.jar
125
128
  - classpath/parquet-jackson-1.10.1.jar
126
129
  - classpath/protobuf-java-2.5.0.jar
127
- - classpath/scala-library-2.13.0.jar
130
+ - classpath/scala-library-2.13.1.jar
128
131
  - classpath/servlet-api-2.5-20081211.jar
129
132
  - classpath/servlet-api-2.5.jar
130
133
  - classpath/slf4j-api-1.7.25.jar
@@ -138,6 +141,8 @@ files:
138
141
  - classpath/zookeeper-3.4.6.jar
139
142
  - example/config.yml
140
143
  - example/data.tsv
144
+ - example/with_catalog.yml
145
+ - example/with_logicaltypes.yml
141
146
  - gradle/wrapper/gradle-wrapper.jar
142
147
  - gradle/wrapper/gradle-wrapper.properties
143
148
  - gradlew
@@ -154,11 +159,15 @@ files:
154
159
  - src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala
155
160
  - src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala
156
161
  - src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala
162
+ - src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandler.scala
163
+ - src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandlerStore.scala
157
164
  - src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala
158
165
  - src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala
159
166
  - src/test/resources/org/embulk/output/s3_parquet/in1.csv
160
167
  - src/test/resources/org/embulk/output/s3_parquet/out1.tsv
161
168
  - src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala
169
+ - src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandler.scala
170
+ - src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandlerStore.scala
162
171
  homepage: https://github.com/civitaspo/embulk-output-s3_parquet
163
172
  licenses:
164
173
  - MIT