RubyGems - embulk-output-s3_parquet - Versions diffs - 0.1.0 → 0.2.0 - Mend

embulk-output-s3_parquet 0.1.0 → 0.2.0

Files changed (35) hide show

checksums.yaml +4 -4
data/.github/workflows/release.yml +3 -0
data/.github/workflows/test.yml +2 -0
data/.scalafmt.conf +5 -0
data/CHANGELOG.md +15 -0
data/README.md +3 -2
data/build.gradle +19 -9
data/example/config.yml +3 -1
data/example/prepare_s3_bucket.sh +6 -0
data/example/with_catalog.yml +3 -1
data/example/with_logicaltypes.yml +3 -1
data/gradle/wrapper/gradle-wrapper.jar +0 -0
data/gradle/wrapper/gradle-wrapper.properties +1 -1
data/gradlew +31 -20
data/gradlew.bat +17 -1
data/run_s3_local.sh +7 -0
data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala +226 -178
data/src/main/scala/org/embulk/output/s3_parquet/ContextClassLoaderSwapper.scala +18 -0
data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetOutputPlugin.scala +293 -204
data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetPageOutput.scala +46 -49
data/src/main/scala/org/embulk/output/s3_parquet/aws/Aws.scala +46 -50
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsClientConfiguration.scala +18 -23
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsCredentials.scala +146 -119
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsEndpointConfiguration.scala +32 -35
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala +45 -41
data/src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala +40 -43
data/src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala +138 -92
data/src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandler.scala +117 -102
data/src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandlerStore.scala +91 -84
data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala +30 -29
data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala +143 -152
data/src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala +144 -117
data/src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandler.scala +72 -66
data/src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandlerStore.scala +149 -132
metadata +22 -15

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 9a5fcc051188467ff067c7542e3c12d32b9ce57e
-  data.tar.gz: c93d01c345e6e3a8b43f335f0467ee47532cc32d
+  metadata.gz: 69eeaa8791df4a9dce1d4746d881805e7f8c2ea4
+  data.tar.gz: 322f28022072631766fb7f862b4465f04f8f0745
 SHA512:
-  metadata.gz: 510bf2837f6c57e225b53084790dc0e79feef60247b73d8aee7cd268725676e0783e9ecee0cb2db4a3235969634802b2b8005208f36a4f1a86f1d15777ea9bb9
-  data.tar.gz: a1c23cbf8e5bc1c5414e4e906b0060cb5a7da9085767f319df57763724cfc07cda3925ba085015951dd3a5e40a9dba6a211d777474f00383b9d3f08d9f1d706a
+  metadata.gz: 6cfbe96838e1960f5097ee9c33f78a2d02f111c9b06014954f18b7cebf97b89d265b22affd755bd1318b4a6a9e9953599aeaa013cde8bc3a7e5d91264abeed71
+  data.tar.gz: e5eac48dd2822412acff3d0612cff714d77fa9a15d8fa33a27b8d3c668f226eb1fbbdbfd4dbf6649ebc6667fcc01273d4da7a896987537d0e2fa0ca654dbbaed

data/.github/workflows/release.yml CHANGED

@@ -25,6 +25,9 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: 1.8
+    - name: scalafmt
+      if: github.event.pull_request.merged == true
+      run: ./gradlew spotlessCheck
     - name: Test with Gradle
       if: github.event.pull_request.merged == true
       run: ./gradlew test

data/.github/workflows/test.yml CHANGED

@@ -21,6 +21,8 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: 1.8
+    - name: scalafmt
+      run: ./gradlew spotlessCheck
     - name: Test with Gradle
       run: ./gradlew test

data/.scalafmt.conf ADDED

@@ -0,0 +1,5 @@
+# https://scalameta.org/scalafmt/#Configuration
+version = "2.3.2"
+newlines.alwaysBeforeElseAfterCurlyIf = true
+newlines.alwaysBeforeTopLevelStatements = true

data/CHANGELOG.md CHANGED

@@ -1,3 +1,18 @@
+0.2.0 (2020-03-10)
+==================
+* [Enhancement] [#23](https://github.com/civitaspo/embulk-output-s3_parquet/pull/23) Limit the usage of swapping ContextClassLoader
+* [BugFix] [#24](https://github.com/civitaspo/embulk-output-s3_parquet/pull/24) Use basic credentials correctly
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update gradle 4.1 -> 6.1
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update parquet-{column,common,encoding,hadoop,jackson,tools} 1.10.1 -> 1.11.0 with the latest parquet-format 2.4.0 -> 2.7.0
+    * [parquet-format CHANGELOG](https://github.com/apache/parquet-format/blob/master/CHANGES.md)
+    * [parquet-mr CHANGELOG](https://github.com/apache/parquet-mr/blob/apache-parquet-1.11.0/CHANGES.md#version-1110)
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update aws-java-sdk 1.11.676 -> 1.11.739
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update embulk 0.9.20 -> 0.9.23 with embulk-deps-{config,buffer}
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Use scalafmt instead of the Intellij formatter.
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Use scalafmt in CI.
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Enable to run examples locally with some prepared scripts.
 0.1.0 (2019-11-17)
 ==================

data/README.md CHANGED

@@ -131,6 +131,8 @@ out:
 ### Run example:
 ```shell
+$ ./run_s3_local.sh
+$ ./example/prepare_s3_bucket.sh
 $ ./gradlew classpath
 $ embulk run example/config.yml -Ilib
 ```
@@ -138,8 +140,7 @@ $ embulk run example/config.yml -Ilib
 ### Run test:
 ```shell
-## Run fake S3 with localstack
-$ docker run -it --rm -p 4572:4572 -e SERVICES=s3 localstack/localstack
+$ ./run_s3_local.sh
 $ ./gradlew test
 ```

data/build.gradle CHANGED

@@ -3,6 +3,7 @@ plugins {
     id "com.jfrog.bintray" version "1.1"
     id "com.github.jruby-gradle.base" version "1.5.0"
     id "com.adarshr.test-logger" version "1.6.0"  // For Pretty test logging
+    id "com.diffplug.gradle.spotless" version "3.27.1"
 }
 import com.github.jrubygradle.JRubyExec
 repositories {
@@ -13,29 +14,32 @@ configurations {
     provided
 }
-version = "0.1.0"
+version = "0.2.0"
 sourceCompatibility = 1.8
 targetCompatibility = 1.8
 dependencies {
-    compile  "org.embulk:embulk-core:0.9.20"
-    provided "org.embulk:embulk-core:0.9.20"
+    compile  "org.embulk:embulk-core:0.9.23"
+    provided "org.embulk:embulk-core:0.9.23"
     compile 'org.scala-lang:scala-library:2.13.1'
     ['glue', 's3', 'sts'].each { v ->
-        compile "com.amazonaws:aws-java-sdk-${v}:1.11.676"
+        compile "com.amazonaws:aws-java-sdk-${v}:1.11.739"
     }
-    ['column', 'common', 'encoding', 'format', 'hadoop', 'jackson'].each { v ->
-        compile "org.apache.parquet:parquet-${v}:1.10.1"
+    ['column', 'common', 'encoding', 'hadoop', 'jackson'].each { v ->
+        compile "org.apache.parquet:parquet-${v}:1.11.0"
     }
+    // ref. https://github.com/apache/parquet-mr/blob/apache-parquet-1.11.0/pom.xml#L85
+    compile 'org.apache.parquet:parquet-format:2.7.0'
     compile 'org.apache.hadoop:hadoop-common:2.9.2'
     compile 'org.xerial.snappy:snappy-java:1.1.7.3'
+    ['test', 'standards', 'deps-buffer', 'deps-config'].each { v ->
+        testCompile "org.embulk:embulk-${v}:0.9.23"
+    }
     testCompile 'org.scalatest:scalatest_2.13:3.0.8'
-    testCompile 'org.embulk:embulk-test:0.9.20'
-    testCompile 'org.embulk:embulk-standards:0.9.20'
-    testCompile 'org.apache.parquet:parquet-tools:1.10.1'
+    testCompile 'org.apache.parquet:parquet-tools:1.11.0'
     testCompile 'org.apache.hadoop:hadoop-client:2.9.2'
 }
@@ -43,6 +47,12 @@ testlogger {
     theme "mocha"
 }
+spotless {
+    scala {
+        scalafmt('2.3.2').configFile('.scalafmt.conf')
+    }
+}
 task classpath(type: Copy, dependsOn: ["jar"]) {
     doFirst { file("classpath").deleteDir() }
     from (configurations.runtime - configurations.provided + files(jar.archivePath))

data/example/config.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: my-bucket
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj.
   file_ext: snappy.parquet
   compression_codec: snappy

data/example/prepare_s3_bucket.sh ADDED

@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+aws s3 mb s3://example \
+    --endpoint-url http://localhost:4572 \
+    --region us-east-1

data/example/with_catalog.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: dev-baikal-workspace
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj-2.
   file_ext: snappy.parquet
   compression_codec: snappy

data/example/with_logicaltypes.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: my-bucket
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj-2.
   file_ext: snappy.parquet
   compression_codec: snappy

data/gradle/wrapper/gradle-wrapper.jar CHANGED

Binary file

data/gradle/wrapper/gradle-wrapper.properties CHANGED

@@ -1,5 +1,5 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.1-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip

data/gradlew CHANGED

@@ -1,5 +1,21 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 ##############################################################################
 ##
 ##  Gradle start up script for UN*X
@@ -28,7 +44,7 @@ APP_NAME="Gradle"
 APP_BASE_NAME=`basename "$0"`
 # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS=""
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
 # Use the maximum available, or set MAX_FD != -1 to use that value.
 MAX_FD="maximum"
@@ -109,8 +125,8 @@ if $darwin; then
     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
 fi
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin ; then
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
     JAVACMD=`cygpath --unix "$JAVACMD"`
@@ -138,19 +154,19 @@ if $cygwin ; then
         else
             eval `echo args$i`="\"$arg\""
         fi
-        i=$((i+1))
+        i=`expr $i + 1`
     done
     case $i in
-        (0) set -- ;;
-        (1) set -- "$args0" ;;
-        (2) set -- "$args0" "$args1" ;;
-        (3) set -- "$args0" "$args1" "$args2" ;;
-        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
-        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
-        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
-        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
-        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
-        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
     esac
 fi
@@ -159,14 +175,9 @@ save () {
     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
     echo " "
 }
-APP_ARGS=$(save "$@")
+APP_ARGS=`save "$@"`
 # Collect all arguments for the java command, following the shell quoting and substitution rules
 eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
-# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
-if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
-  cd "$(dirname "$0")"
-fi
 exec "$JAVACMD" "$@"

data/gradlew.bat CHANGED

@@ -1,3 +1,19 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
 @if "%DEBUG%" == "" @echo off
 @rem ##########################################################################
 @rem
@@ -14,7 +30,7 @@ set APP_BASE_NAME=%~n0
 set APP_HOME=%DIRNAME%
 @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS=
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
 @rem Find java.exe
 if defined JAVA_HOME goto findJavaFromJavaHome

data/run_s3_local.sh ADDED

@@ -0,0 +1,7 @@
+#!/bin/sh
+docker run -it -d --rm \
+    -p 4572:4572 \
+    -e SERVICES=s3 \
+    localstack/localstack

data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala CHANGED

@@ -1,202 +1,250 @@
 package org.embulk.output.s3_parquet
 import java.util.{Optional, Map => JMap}
-import com.amazonaws.services.glue.model.{Column, CreateTableRequest, DeleteTableRequest, GetTableRequest, SerDeInfo, StorageDescriptor, TableInput}
+import com.amazonaws.services.glue.model.{
+  Column,
+  CreateTableRequest,
+  DeleteTableRequest,
+  GetTableRequest,
+  SerDeInfo,
+  StorageDescriptor,
+  TableInput
+}
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.embulk.config.{Config, ConfigDefault, ConfigException}
 import org.embulk.output.s3_parquet.aws.Aws
 import org.embulk.output.s3_parquet.CatalogRegistrator.ColumnOptions
 import org.embulk.spi.Schema
-import org.embulk.spi.`type`.{BooleanType, DoubleType, JsonType, LongType, StringType, TimestampType, Type}
+import org.embulk.spi.`type`.{
+  BooleanType,
+  DoubleType,
+  JsonType,
+  LongType,
+  StringType,
+  TimestampType,
+  Type
+}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.jdk.CollectionConverters._
 import scala.util.Try
-object CatalogRegistrator
-{
-    trait Task
-        extends org.embulk.config.Task
-    {
-        @Config("catalog_id")
-        @ConfigDefault("null")
-        def getCatalogId: Optional[String]
-        @Config("database")
-        def getDatabase: String
-        @Config("table")
-        def getTable: String
-        @Config("column_options")
-        @ConfigDefault("{}")
-        def getColumnOptions: JMap[String, ColumnOptions]
-        @Config("operation_if_exists")
-        @ConfigDefault("\"delete\"")
-        def getOperationIfExists: String
-    }
-    trait ColumnOptions
-    {
-        @Config("type")
-        def getType: String
-    }
-    def apply(aws: Aws,
-              task: Task,
-              schema: Schema,
-              location: String,
-              compressionCodec: CompressionCodecName,
-              loggerOption: Option[Logger] = None,
-              parquetColumnLogicalTypes: Map[String, String] = Map.empty): CatalogRegistrator =
-    {
-        new CatalogRegistrator(aws, task, schema, location, compressionCodec, loggerOption, parquetColumnLogicalTypes)
-    }
+object CatalogRegistrator {
+  trait Task extends org.embulk.config.Task {
+    @Config("catalog_id")
+    @ConfigDefault("null")
+    def getCatalogId: Optional[String]
+    @Config("database")
+    def getDatabase: String
+    @Config("table")
+    def getTable: String
+    @Config("column_options")
+    @ConfigDefault("{}")
+    def getColumnOptions: JMap[String, ColumnOptions]
+    @Config("operation_if_exists")
+    @ConfigDefault("\"delete\"")
+    def getOperationIfExists: String
+  }
+  trait ColumnOptions {
+    @Config("type")
+    def getType: String
+  }
+  def apply(
+      aws: Aws,
+      task: Task,
+      schema: Schema,
+      location: String,
+      compressionCodec: CompressionCodecName,
+      loggerOption: Option[Logger] = None,
+      parquetColumnLogicalTypes: Map[String, String] = Map.empty
+  ): CatalogRegistrator = {
+    new CatalogRegistrator(
+      aws,
+      task,
+      schema,
+      location,
+      compressionCodec,
+      loggerOption,
+      parquetColumnLogicalTypes
+    )
+  }
 }
-class CatalogRegistrator(aws: Aws,
-                         task: CatalogRegistrator.Task,
-                         schema: Schema,
-                         location: String,
-                         compressionCodec: CompressionCodecName,
-                         loggerOption: Option[Logger] = None,
-                         parquetColumnLogicalTypes: Map[String, String] = Map.empty)
-{
-    val logger: Logger = loggerOption.getOrElse(LoggerFactory.getLogger(classOf[CatalogRegistrator]))
-    def run(): Unit =
-    {
-        if (doesTableExists()) {
-            task.getOperationIfExists match {
-                case "skip" =>
-                    logger.info(s"Skip to register the table: ${task.getDatabase}.${task.getTable}")
-                    return
-                case "delete" =>
-                    logger.info(s"Delete the table: ${task.getDatabase}.${task.getTable}")
-                    deleteTable()
-                case unknown =>
-                    throw new ConfigException(s"Unsupported operation: $unknown")
-            }
-        }
-        registerNewParquetTable()
-        showNewTableInfo()
+class CatalogRegistrator(
+    aws: Aws,
+    task: CatalogRegistrator.Task,
+    schema: Schema,
+    location: String,
+    compressionCodec: CompressionCodecName,
+    loggerOption: Option[Logger] = None,
+    parquetColumnLogicalTypes: Map[String, String] = Map.empty
+) {
+  val logger: Logger =
+    loggerOption.getOrElse(LoggerFactory.getLogger(classOf[CatalogRegistrator]))
+  def run(): Unit = {
+    if (doesTableExists()) {
+      task.getOperationIfExists match {
+        case "skip" =>
+          logger.info(
+            s"Skip to register the table: ${task.getDatabase}.${task.getTable}"
+          )
+          return
+        case "delete" =>
+          logger.info(s"Delete the table: ${task.getDatabase}.${task.getTable}")
+          deleteTable()
+        case unknown =>
+          throw new ConfigException(s"Unsupported operation: $unknown")
+      }
     }
-    def showNewTableInfo(): Unit =
-    {
-        val req = new GetTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        val t = aws.withGlue(_.getTable(req)).getTable
-        logger.info(s"Created a table: ${t.toString}")
-    }
-    def doesTableExists(): Boolean =
-    {
-        val req = new GetTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        Try(aws.withGlue(_.getTable(req))).isSuccess
+    registerNewParquetTable()
+    showNewTableInfo()
+  }
+  def showNewTableInfo(): Unit = {
+    val req = new GetTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    val t = aws.withGlue(_.getTable(req)).getTable
+    logger.info(s"Created a table: ${t.toString}")
+  }
+  def doesTableExists(): Boolean = {
+    val req = new GetTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    Try(aws.withGlue(_.getTable(req))).isSuccess
+  }
+  def deleteTable(): Unit = {
+    val req = new DeleteTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    aws.withGlue(_.deleteTable(req))
+  }
+  def registerNewParquetTable(): Unit = {
+    logger.info(s"Create a new table: ${task.getDatabase}.${task.getTable}")
+    val req = new CreateTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setTableInput(
+      new TableInput()
+        .withName(task.getTable)
+        .withDescription("Created by embulk-output-s3_parquet")
+        .withTableType("EXTERNAL_TABLE")
+        .withParameters(
+          Map(
+            "EXTERNAL" -> "TRUE",
+            "classification" -> "parquet",
+            "parquet.compression" -> compressionCodec.name()
+          ).asJava
+        )
+        .withStorageDescriptor(
+          new StorageDescriptor()
+            .withColumns(getGlueSchema: _*)
+            .withLocation(location)
+            .withCompressed(isCompressed)
+            .withInputFormat(
+              "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
+            )
+            .withOutputFormat(
+              "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
+            )
+            .withSerdeInfo(
+              new SerDeInfo()
+                .withSerializationLibrary(
+                  "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+                )
+                .withParameters(Map("serialization.format" -> "1").asJava)
+            )
+        )
+    )
+    aws.withGlue(_.createTable(req))
+  }
+  private def getGlueSchema: Seq[Column] = {
+    val columnOptions: Map[String, ColumnOptions] =
+      task.getColumnOptions.asScala.toMap
+    schema.getColumns.asScala.toSeq.map { c =>
+      val cType: String =
+        if (columnOptions.contains(c.getName)) columnOptions(c.getName).getType
+        else if (parquetColumnLogicalTypes.contains(c.getName))
+          convertParquetLogicalTypeToGlueType(
+            parquetColumnLogicalTypes(c.getName)
+          )
+        else convertEmbulkTypeToGlueType(c.getType)
+      new Column()
+        .withName(c.getName)
+        .withType(cType)
     }
-    def deleteTable(): Unit =
-    {
-        val req = new DeleteTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        aws.withGlue(_.deleteTable(req))
-    }
-    def registerNewParquetTable(): Unit =
-    {
-        logger.info(s"Create a new table: ${task.getDatabase}.${task.getTable}")
-        val req = new CreateTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setTableInput(new TableInput()
-                              .withName(task.getTable)
-                              .withDescription("Created by embulk-output-s3_parquet")
-                              .withTableType("EXTERNAL_TABLE")
-                              .withParameters(Map("EXTERNAL" -> "TRUE",
-                                                  "classification" -> "parquet",
-                                                  "parquet.compression" -> compressionCodec.name()).asJava)
-                              .withStorageDescriptor(new StorageDescriptor()
-                                                         .withColumns(getGlueSchema: _*)
-                                                         .withLocation(location)
-                                                         .withCompressed(isCompressed)
-                                                         .withInputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")
-                                                         .withOutputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")
-                                                         .withSerdeInfo(new SerDeInfo()
-                                                                            .withSerializationLibrary("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
-                                                                            .withParameters(Map("serialization.format" -> "1").asJava)
-                                                                        )
-                                                     )
-                          )
-        aws.withGlue(_.createTable(req))
+  }
+  private def convertParquetLogicalTypeToGlueType(t: String): String = {
+    t match {
+      case "timestamp-millis" => "timestamp"
+      case "timestamp-micros" =>
+        "bigint" // Glue cannot recognize timestamp-micros.
+      case "int8"  => "tinyint"
+      case "int16" => "smallint"
+      case "int32" => "int"
+      case "int64" => "bigint"
+      case "uint8" =>
+        "smallint" // Glue tinyint is a minimum value of -2^7 and a maximum value of 2^7-1
+      case "uint16" =>
+        "int" // Glue smallint is a minimum value of -2^15 and a maximum value of 2^15-1.
+      case "uint32" =>
+        "bigint" // Glue int is a minimum value of-2^31 and a maximum value of 2^31-1.
+      case "uint64" =>
+        throw new ConfigException(
+          "Cannot convert uint64 to Glue data types automatically" +
+            " because the Glue bigint supports a 64-bit signed integer." +
+            " Please use `catalog.column_options` to define the type."
+        )
+      case "json" => "string"
+      case _ =>
+        throw new ConfigException(
+          s"Unsupported a parquet logical type: $t. Please use `catalog.column_options` to define the type."
+        )
     }
-    private def getGlueSchema: Seq[Column] =
-    {
-        val columnOptions: Map[String, ColumnOptions] = task.getColumnOptions.asScala.toMap
-        schema.getColumns.asScala.toSeq.map { c =>
-            val cType: String =
-                if (columnOptions.contains(c.getName)) columnOptions(c.getName).getType
-                else if (parquetColumnLogicalTypes.contains(c.getName)) convertParquetLogicalTypeToGlueType(parquetColumnLogicalTypes(c.getName))
-                else convertEmbulkTypeToGlueType(c.getType)
-            new Column()
-                .withName(c.getName)
-                .withType(cType)
-        }
+  }
+  private def convertEmbulkTypeToGlueType(t: Type): String = {
+    t match {
+      case _: BooleanType   => "boolean"
+      case _: LongType      => "bigint"
+      case _: DoubleType    => "double"
+      case _: StringType    => "string"
+      case _: TimestampType => "string"
+      case _: JsonType      => "string"
+      case unknown =>
+        throw new ConfigException(
+          s"Unsupported embulk type: ${unknown.getName}"
+        )
     }
+  }
-    private def convertParquetLogicalTypeToGlueType(t: String): String =
-    {
-        t match {
-            case "timestamp-millis" => "timestamp"
-            case "timestamp-micros" => "bigint" // Glue cannot recognize timestamp-micros.
-            case "int8"             => "tinyint"
-            case "int16"            => "smallint"
-            case "int32"            => "int"
-            case "int64"            => "bigint"
-            case "uint8"            => "smallint" // Glue tinyint is a minimum value of -2^7 and a maximum value of 2^7-1
-            case "uint16"           => "int" // Glue smallint is a minimum value of -2^15 and a maximum value of 2^15-1.
-            case "uint32"           => "bigint" // Glue int is a minimum value of-2^31 and a maximum value of 2^31-1.
-            case "uint64"           => throw new ConfigException("Cannot convert uint64 to Glue data types automatically" +
-                                                                     " because the Glue bigint supports a 64-bit signed integer." +
-                                                                     " Please use `catalog.column_options` to define the type.")
-            case "json"             => "string"
-            case _                  => throw new ConfigException(s"Unsupported a parquet logical type: $t. Please use `catalog.column_options` to define the type.")
-        }
-    }
-    private def convertEmbulkTypeToGlueType(t: Type): String =
-    {
-        t match {
-            case _: BooleanType   => "boolean"
-            case _: LongType      => "bigint"
-            case _: DoubleType    => "double"
-            case _: StringType    => "string"
-            case _: TimestampType => "string"
-            case _: JsonType      => "string"
-            case unknown          => throw new ConfigException(s"Unsupported embulk type: ${unknown.getName}")
-        }
-    }
-    private def isCompressed: Boolean =
-    {
-        !compressionCodec.equals(CompressionCodecName.UNCOMPRESSED)
-    }
+  private def isCompressed: Boolean = {
+    !compressionCodec.equals(CompressionCodecName.UNCOMPRESSED)
+  }
 }