embulk-output-s3_parquet 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,9 +4,9 @@ package org.embulk.output.s3_parquet
4
4
  import java.io.File
5
5
  import java.nio.file.FileSystems
6
6
 
7
- import cloud.localstack.{DockerTestUtils, Localstack, TestUtils}
8
- import cloud.localstack.docker.LocalstackDocker
9
- import cloud.localstack.docker.annotation.LocalstackDockerConfiguration
7
+ import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials}
8
+ import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration
9
+ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
10
10
  import com.amazonaws.services.s3.transfer.TransferManagerBuilder
11
11
  import com.google.common.io.Resources
12
12
  import org.apache.hadoop.fs.{Path => HadoopPath}
@@ -18,7 +18,7 @@ import org.embulk.test.{EmbulkTests, TestingEmbulk}
18
18
  import org.junit.Rule
19
19
  import org.junit.runner.RunWith
20
20
  import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, DiagrammedAssertions, FunSuite}
21
- import org.scalatest.junit.JUnitRunner
21
+ import org.scalatestplus.junit.JUnitRunner
22
22
 
23
23
  import scala.annotation.meta.getter
24
24
  import scala.jdk.CollectionConverters._
@@ -33,22 +33,11 @@ class TestS3ParquetOutputPlugin
33
33
  {
34
34
 
35
35
  val RESOURCE_NAME_PREFIX: String = "org/embulk/output/s3_parquet/"
36
- val BUCKET_NAME: String = "my-bucket"
37
-
38
- val LOCALSTACK_DOCKER: LocalstackDocker = LocalstackDocker.INSTANCE
39
-
40
- override protected def beforeAll(): Unit =
41
- {
42
- Localstack.teardownInfrastructure()
43
- LOCALSTACK_DOCKER.startup(LocalstackDockerConfiguration.DEFAULT)
44
- super.beforeAll()
45
- }
46
-
47
- override protected def afterAll(): Unit =
48
- {
49
- LOCALSTACK_DOCKER.stop()
50
- super.afterAll()
51
- }
36
+ val TEST_S3_ENDPOINT: String = "http://localhost:4572"
37
+ val TEST_S3_REGION: String = "us-east-1"
38
+ val TEST_S3_ACCESS_KEY_ID: String = "test"
39
+ val TEST_S3_SECRET_ACCESS_KEY: String = "test"
40
+ val TEST_BUCKET_NAME: String = "my-bucket"
52
41
 
53
42
  @(Rule@getter)
54
43
  val embulk: TestingEmbulk = TestingEmbulk.builder()
@@ -56,7 +45,11 @@ class TestS3ParquetOutputPlugin
56
45
  .build()
57
46
 
58
47
  before {
59
- DockerTestUtils.getClientS3.createBucket(BUCKET_NAME)
48
+ withLocalStackS3Client(_.createBucket(TEST_BUCKET_NAME))
49
+ }
50
+
51
+ after {
52
+ withLocalStackS3Client(_.deleteBucket(TEST_BUCKET_NAME))
60
53
  }
61
54
 
62
55
  def defaultOutConfig(): ConfigSource =
@@ -64,11 +57,11 @@ class TestS3ParquetOutputPlugin
64
57
  embulk.newConfig()
65
58
  .set("type", "s3_parquet")
66
59
  .set("endpoint", "http://localhost:4572") // See https://github.com/localstack/localstack#overview
67
- .set("bucket", BUCKET_NAME)
60
+ .set("bucket", TEST_BUCKET_NAME)
68
61
  .set("path_prefix", "path/to/p")
69
62
  .set("auth_method", "basic")
70
- .set("access_key_id", TestUtils.TEST_ACCESS_KEY)
71
- .set("secret_access_key", TestUtils.TEST_SECRET_KEY)
63
+ .set("access_key_id", TEST_S3_ACCESS_KEY_ID)
64
+ .set("secret_access_key", TEST_S3_SECRET_ACCESS_KEY)
72
65
  .set("path_style_access_enabled", true)
73
66
  .set("default_timezone", "Asia/Tokyo")
74
67
  }
@@ -111,12 +104,14 @@ class TestS3ParquetOutputPlugin
111
104
  def readParquetFile(bucket: String,
112
105
  key: String): Seq[Map[String, String]] =
113
106
  {
114
- val xfer = TransferManagerBuilder.standard()
115
- .withS3Client(DockerTestUtils.getClientS3)
116
- .build()
117
107
  val createdParquetFile = embulk.createTempFile("in")
118
- try xfer.download(bucket, key, createdParquetFile.toFile).waitForCompletion()
119
- finally xfer.shutdownNow()
108
+ withLocalStackS3Client {s3 =>
109
+ val xfer = TransferManagerBuilder.standard()
110
+ .withS3Client(s3)
111
+ .build()
112
+ try xfer.download(bucket, key, createdParquetFile.toFile).waitForCompletion()
113
+ finally xfer.shutdownNow()
114
+ }
120
115
 
121
116
  val reader: ParquetReader[SimpleRecord] = ParquetReader
122
117
  .builder(new SimpleReadSupport(), new HadoopPath(createdParquetFile.toString))
@@ -146,4 +141,14 @@ class TestS3ParquetOutputPlugin
146
141
  FileSystems.getDefault.getPath(new File(url.toURI).getAbsolutePath)
147
142
  }
148
143
 
144
+ private def withLocalStackS3Client[A](f: AmazonS3 => A): A = {
145
+ val client: AmazonS3 = AmazonS3ClientBuilder.standard
146
+ .withEndpointConfiguration(new EndpointConfiguration(TEST_S3_ENDPOINT, TEST_S3_REGION))
147
+ .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(TEST_S3_ACCESS_KEY_ID, TEST_S3_SECRET_ACCESS_KEY)))
148
+ .withPathStyleAccessEnabled(true)
149
+ .build()
150
+
151
+ try f(client)
152
+ finally client.shutdown()
153
+ }
149
154
  }
@@ -0,0 +1,78 @@
1
+ package org.embulk.output.s3_parquet.parquet
2
+
3
+
4
+ import org.embulk.spi.DataException
5
+ import org.embulk.spi.`type`.Types
6
+ import org.junit.runner.RunWith
7
+ import org.scalatest.FunSuite
8
+ import org.scalatestplus.junit.JUnitRunner
9
+
10
+ import scala.util.Try
11
+
12
+
13
+ @RunWith(classOf[JUnitRunner])
14
+ class TestLogicalTypeHandler
15
+ extends FunSuite
16
+ {
17
+
18
+ test("IntLogicalTypeHandler.isConvertible() returns true for long") {
19
+ val h = Int8LogicalTypeHandler
20
+
21
+ assert(h.isConvertible(Types.LONG))
22
+ assert(!h.isConvertible(Types.BOOLEAN))
23
+ }
24
+
25
+ test("IntLogicalTypeHandler.consume() raises DataException if given type is not long") {
26
+ val h = Int8LogicalTypeHandler
27
+ val actual = Try(h.consume("invalid", null))
28
+
29
+ assert(actual.isFailure)
30
+ assert(actual.failed.get.isInstanceOf[DataException])
31
+ }
32
+
33
+
34
+ test("TimestampMillisLogicalTypeHandler.isConvertible() returns true for timestamp") {
35
+ val h = TimestampMillisLogicalTypeHandler
36
+
37
+ assert(h.isConvertible(Types.TIMESTAMP))
38
+ assert(!h.isConvertible(Types.BOOLEAN))
39
+ }
40
+
41
+ test("TimestampMillisLogicalTypeHandler.consume() raises DataException if given type is not timestamp") {
42
+ val h = TimestampMillisLogicalTypeHandler
43
+ val actual = Try(h.consume("invalid", null))
44
+
45
+ assert(actual.isFailure)
46
+ assert(actual.failed.get.isInstanceOf[DataException])
47
+ }
48
+
49
+
50
+ test("TimestampMicrosLogicalTypeHandler.isConvertible() returns true for timestamp") {
51
+ val h = TimestampMicrosLogicalTypeHandler
52
+
53
+ assert(h.isConvertible(Types.TIMESTAMP))
54
+ assert(!h.isConvertible(Types.BOOLEAN))
55
+ }
56
+
57
+ test("TimestampMicrosLogicalTypeHandler.consume() raises DataException if given type is not timestamp") {
58
+ val h = TimestampMicrosLogicalTypeHandler
59
+ val actual = Try(h.consume("invalid", null))
60
+
61
+ assert(actual.isFailure)
62
+ assert(actual.failed.get.isInstanceOf[DataException])
63
+ }
64
+
65
+ test("JsonLogicalTypeHandler.isConvertible() returns true for json") {
66
+ val h = JsonLogicalTypeHandler
67
+
68
+ assert(h.isConvertible(Types.JSON))
69
+ assert(!h.isConvertible(Types.BOOLEAN))
70
+ }
71
+
72
+ test("JsonLogicalTypeHandler.consume() raises DataException if given type is not json") {
73
+ val h = JsonLogicalTypeHandler
74
+ val actual = Try(h.consume("invalid", null))
75
+ assert(actual.isFailure)
76
+ assert(actual.failed.get.isInstanceOf[DataException])
77
+ }
78
+ }
@@ -0,0 +1,162 @@
1
+ package org.embulk.output.s3_parquet.parquet
2
+
3
+
4
+ import java.util.Optional
5
+
6
+ import com.google.common.base.{Optional => GOptional}
7
+ import org.embulk.config.{ConfigException, TaskSource}
8
+ import org.embulk.output.s3_parquet.S3ParquetOutputPlugin.{ColumnOptionTask, TypeOptionTask}
9
+ import org.embulk.spi.`type`.{Types, Type => EType}
10
+ import org.junit.runner.RunWith
11
+ import org.scalatest.FunSuite
12
+ import org.scalatestplus.junit.JUnitRunner
13
+
14
+ import scala.jdk.CollectionConverters._
15
+ import scala.util.Try
16
+
17
+
18
+ @RunWith(classOf[JUnitRunner])
19
+ class TestLogicalTypeHandlerStore
20
+ extends FunSuite
21
+ {
22
+ test("empty() returns empty maps") {
23
+ val rv = LogicalTypeHandlerStore.empty
24
+
25
+ assert(rv.fromColumnName.isEmpty)
26
+ assert(rv.fromEmbulkType.isEmpty)
27
+ }
28
+
29
+ test("fromEmbulkOptions() returns handlers for valid option tasks") {
30
+ val typeOpts = Map[String, TypeOptionTask](
31
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
32
+ ).asJava
33
+ val columnOpts = Map[String, ColumnOptionTask](
34
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("timestamp-micros")),
35
+ ).asJava
36
+
37
+ val expected1 = Map[EType, LogicalTypeHandler](
38
+ Types.TIMESTAMP -> TimestampMillisLogicalTypeHandler,
39
+ )
40
+ val expected2 = Map[String, LogicalTypeHandler](
41
+ "col1" -> TimestampMicrosLogicalTypeHandler,
42
+ )
43
+
44
+ val rv = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
45
+
46
+ assert(rv.fromEmbulkType == expected1)
47
+ assert(rv.fromColumnName == expected2)
48
+ }
49
+
50
+ test("fromEmbulkOptions() raises ConfigException if invalid option tasks given") {
51
+ val emptyTypeOpts = Map.empty[String, TypeOptionTask].asJava
52
+ val emptyColumnOpts = Map.empty[String, ColumnOptionTask].asJava
53
+
54
+ val invalidTypeOpts = Map[String, TypeOptionTask](
55
+ "unknown-embulk-type-name" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
56
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("unknown-parquet-logical-type-name")),
57
+ ).asJava
58
+ val invalidColumnOpts = Map[String, ColumnOptionTask](
59
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("unknown-parquet-logical-type-name")),
60
+ ).asJava
61
+
62
+ val try1 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(invalidTypeOpts, emptyColumnOpts))
63
+ assert(try1.isFailure)
64
+ assert(try1.failed.get.isInstanceOf[ConfigException])
65
+
66
+ val try2 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(emptyTypeOpts, invalidColumnOpts))
67
+ assert(try2.isFailure)
68
+ assert(try2.failed.get.isInstanceOf[ConfigException])
69
+
70
+ val try3 = Try(LogicalTypeHandlerStore.fromEmbulkOptions(invalidTypeOpts, invalidColumnOpts))
71
+ assert(try3.isFailure)
72
+ assert(try3.failed.get.isInstanceOf[ConfigException])
73
+ }
74
+
75
+ test("get() returns a handler matched with primary column name condition") {
76
+ val typeOpts = Map[String, TypeOptionTask](
77
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
78
+ ).asJava
79
+ val columnOpts = Map[String, ColumnOptionTask](
80
+ "col1" -> DummyColumnOptionTask(Optional.of[String]("timestamp-micros")),
81
+ ).asJava
82
+
83
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
84
+
85
+ // It matches both of column name and embulk type, and column name should be primary
86
+ val expected = Some(TimestampMicrosLogicalTypeHandler)
87
+ val actual = handlers.get("col1", Types.TIMESTAMP)
88
+
89
+ assert(actual == expected)
90
+ }
91
+
92
+ test("get() returns a handler matched with type name condition") {
93
+ val typeOpts = Map[String, TypeOptionTask](
94
+ "timestamp" -> DummyTypeOptionTask(Optional.of[String]("timestamp-millis")),
95
+ ).asJava
96
+ val columnOpts = Map.empty[String, ColumnOptionTask].asJava
97
+
98
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
99
+
100
+ // It matches column name
101
+ val expected = Some(TimestampMillisLogicalTypeHandler)
102
+ val actual = handlers.get("col1", Types.TIMESTAMP)
103
+
104
+ assert(actual == expected)
105
+ }
106
+
107
+ test("get() returns None if not matched") {
108
+ val typeOpts = Map.empty[String, TypeOptionTask].asJava
109
+ val columnOpts = Map.empty[String, ColumnOptionTask].asJava
110
+
111
+ val handlers = LogicalTypeHandlerStore.fromEmbulkOptions(typeOpts, columnOpts)
112
+
113
+ // It matches embulk type
114
+ val actual = handlers.get("col1", Types.TIMESTAMP)
115
+
116
+ assert(actual.isEmpty)
117
+ }
118
+
119
+ private case class DummyTypeOptionTask(lt: Optional[String])
120
+ extends TypeOptionTask
121
+ {
122
+ override def getLogicalType: Optional[String] =
123
+ {
124
+ lt
125
+ }
126
+
127
+ override def validate(): Unit =
128
+ {}
129
+
130
+ override def dump(): TaskSource =
131
+ {
132
+ null
133
+ }
134
+ }
135
+
136
+ private case class DummyColumnOptionTask(lt: Optional[String])
137
+ extends ColumnOptionTask
138
+ {
139
+ override def getTimeZoneId: GOptional[String] =
140
+ {
141
+ GOptional.absent[String]
142
+ }
143
+
144
+ override def getFormat: GOptional[String] =
145
+ {
146
+ GOptional.absent[String]
147
+ }
148
+
149
+ override def getLogicalType: Optional[String] =
150
+ {
151
+ lt
152
+ }
153
+
154
+ override def validate(): Unit =
155
+ {}
156
+
157
+ override def dump(): TaskSource =
158
+ {
159
+ null
160
+ }
161
+ }
162
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-s3_parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2019-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +45,9 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
+ - ".github/FUNDING.yml"
49
+ - ".github/workflows/release.yml"
50
+ - ".github/workflows/test.yml"
48
51
  - ".gitignore"
49
52
  - CHANGELOG.md
50
53
  - LICENSE.txt
@@ -59,14 +62,14 @@ files:
59
62
  - classpath/asm-3.1.jar
60
63
  - classpath/asm-5.0.4.jar
61
64
  - classpath/avro-1.7.7.jar
62
- - classpath/aws-java-sdk-core-1.11.592.jar
63
- - classpath/aws-java-sdk-glue-1.11.592.jar
64
- - classpath/aws-java-sdk-kms-1.11.592.jar
65
- - classpath/aws-java-sdk-s3-1.11.592.jar
66
- - classpath/aws-java-sdk-sts-1.11.592.jar
65
+ - classpath/aws-java-sdk-core-1.11.676.jar
66
+ - classpath/aws-java-sdk-glue-1.11.676.jar
67
+ - classpath/aws-java-sdk-kms-1.11.676.jar
68
+ - classpath/aws-java-sdk-s3-1.11.676.jar
69
+ - classpath/aws-java-sdk-sts-1.11.676.jar
67
70
  - classpath/commons-beanutils-1.7.0.jar
68
71
  - classpath/commons-cli-1.2.jar
69
- - classpath/commons-codec-1.10.jar
72
+ - classpath/commons-codec-1.11.jar
70
73
  - classpath/commons-collections-3.2.2.jar
71
74
  - classpath/commons-compress-1.4.1.jar
72
75
  - classpath/commons-configuration-1.6.jar
@@ -80,17 +83,17 @@ files:
80
83
  - classpath/curator-client-2.7.1.jar
81
84
  - classpath/curator-framework-2.7.1.jar
82
85
  - classpath/curator-recipes-2.7.1.jar
83
- - classpath/embulk-output-s3_parquet-0.0.3.jar
86
+ - classpath/embulk-output-s3_parquet-0.1.0.jar
84
87
  - classpath/gson-2.2.4.jar
85
88
  - classpath/hadoop-annotations-2.9.2.jar
86
89
  - classpath/hadoop-auth-2.9.2.jar
87
90
  - classpath/hadoop-common-2.9.2.jar
88
91
  - classpath/htrace-core4-4.1.0-incubating.jar
89
- - classpath/httpclient-4.5.5.jar
90
- - classpath/httpcore-4.4.9.jar
92
+ - classpath/httpclient-4.5.9.jar
93
+ - classpath/httpcore-4.4.11.jar
91
94
  - classpath/ion-java-1.0.2.jar
92
95
  - classpath/jackson-core-asl-1.9.13.jar
93
- - classpath/jackson-databind-2.6.7.2.jar
96
+ - classpath/jackson-databind-2.6.7.3.jar
94
97
  - classpath/jackson-dataformat-cbor-2.6.7.jar
95
98
  - classpath/jackson-jaxrs-1.8.3.jar
96
99
  - classpath/jackson-mapper-asl-1.9.13.jar
@@ -108,7 +111,7 @@ files:
108
111
  - classpath/jetty-sslengine-6.1.26.jar
109
112
  - classpath/jetty-util-6.1.26.jar
110
113
  - classpath/jline-0.9.94.jar
111
- - classpath/jmespath-java-1.11.592.jar
114
+ - classpath/jmespath-java-1.11.676.jar
112
115
  - classpath/jsch-0.1.54.jar
113
116
  - classpath/json-smart-2.3.jar
114
117
  - classpath/jsp-api-2.1.jar
@@ -124,7 +127,7 @@ files:
124
127
  - classpath/parquet-hadoop-1.10.1.jar
125
128
  - classpath/parquet-jackson-1.10.1.jar
126
129
  - classpath/protobuf-java-2.5.0.jar
127
- - classpath/scala-library-2.13.0.jar
130
+ - classpath/scala-library-2.13.1.jar
128
131
  - classpath/servlet-api-2.5-20081211.jar
129
132
  - classpath/servlet-api-2.5.jar
130
133
  - classpath/slf4j-api-1.7.25.jar
@@ -138,6 +141,8 @@ files:
138
141
  - classpath/zookeeper-3.4.6.jar
139
142
  - example/config.yml
140
143
  - example/data.tsv
144
+ - example/with_catalog.yml
145
+ - example/with_logicaltypes.yml
141
146
  - gradle/wrapper/gradle-wrapper.jar
142
147
  - gradle/wrapper/gradle-wrapper.properties
143
148
  - gradlew
@@ -154,11 +159,15 @@ files:
154
159
  - src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala
155
160
  - src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala
156
161
  - src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala
162
+ - src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandler.scala
163
+ - src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandlerStore.scala
157
164
  - src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala
158
165
  - src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala
159
166
  - src/test/resources/org/embulk/output/s3_parquet/in1.csv
160
167
  - src/test/resources/org/embulk/output/s3_parquet/out1.tsv
161
168
  - src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala
169
+ - src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandler.scala
170
+ - src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandlerStore.scala
162
171
  homepage: https://github.com/civitaspo/embulk-output-s3_parquet
163
172
  licenses:
164
173
  - MIT