RubyGems - embulk-output-s3_parquet - Versions diffs - 0.1.0 → 0.2.0 - Mend

embulk-output-s3_parquet 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/.github/workflows/release.yml +3 -0
data/.github/workflows/test.yml +2 -0
data/.scalafmt.conf +5 -0
data/CHANGELOG.md +15 -0
data/README.md +3 -2
data/build.gradle +19 -9
data/example/config.yml +3 -1
data/example/prepare_s3_bucket.sh +6 -0
data/example/with_catalog.yml +3 -1
data/example/with_logicaltypes.yml +3 -1
data/gradle/wrapper/gradle-wrapper.jar +0 -0
data/gradle/wrapper/gradle-wrapper.properties +1 -1
data/gradlew +31 -20
data/gradlew.bat +17 -1
data/run_s3_local.sh +7 -0
data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala +226 -178
data/src/main/scala/org/embulk/output/s3_parquet/ContextClassLoaderSwapper.scala +18 -0
data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetOutputPlugin.scala +293 -204
data/src/main/scala/org/embulk/output/s3_parquet/S3ParquetPageOutput.scala +46 -49
data/src/main/scala/org/embulk/output/s3_parquet/aws/Aws.scala +46 -50
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsClientConfiguration.scala +18 -23
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsCredentials.scala +146 -119
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsEndpointConfiguration.scala +32 -35
data/src/main/scala/org/embulk/output/s3_parquet/aws/AwsS3Configuration.scala +45 -41
data/src/main/scala/org/embulk/output/s3_parquet/aws/HttpProxy.scala +40 -43
data/src/main/scala/org/embulk/output/s3_parquet/parquet/EmbulkMessageType.scala +138 -92
data/src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandler.scala +117 -102
data/src/main/scala/org/embulk/output/s3_parquet/parquet/LogicalTypeHandlerStore.scala +91 -84
data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriteSupport.scala +30 -29
data/src/main/scala/org/embulk/output/s3_parquet/parquet/ParquetFileWriter.scala +143 -152
data/src/test/scala/org/embulk/output/s3_parquet/TestS3ParquetOutputPlugin.scala +144 -117
data/src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandler.scala +72 -66
data/src/test/scala/org/embulk/output/s3_parquet/parquet/TestLogicalTypeHandlerStore.scala +149 -132
metadata +22 -15

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 9a5fcc051188467ff067c7542e3c12d32b9ce57e
-  data.tar.gz: c93d01c345e6e3a8b43f335f0467ee47532cc32d
+  metadata.gz: 69eeaa8791df4a9dce1d4746d881805e7f8c2ea4
+  data.tar.gz: 322f28022072631766fb7f862b4465f04f8f0745
 SHA512:
-  metadata.gz: 510bf2837f6c57e225b53084790dc0e79feef60247b73d8aee7cd268725676e0783e9ecee0cb2db4a3235969634802b2b8005208f36a4f1a86f1d15777ea9bb9
-  data.tar.gz: a1c23cbf8e5bc1c5414e4e906b0060cb5a7da9085767f319df57763724cfc07cda3925ba085015951dd3a5e40a9dba6a211d777474f00383b9d3f08d9f1d706a
+  metadata.gz: 6cfbe96838e1960f5097ee9c33f78a2d02f111c9b06014954f18b7cebf97b89d265b22affd755bd1318b4a6a9e9953599aeaa013cde8bc3a7e5d91264abeed71
+  data.tar.gz: e5eac48dd2822412acff3d0612cff714d77fa9a15d8fa33a27b8d3c668f226eb1fbbdbfd4dbf6649ebc6667fcc01273d4da7a896987537d0e2fa0ca654dbbaed

data/.github/workflows/release.yml CHANGED

@@ -25,6 +25,9 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: 1.8
+    - name: scalafmt
+      if: github.event.pull_request.merged == true
+      run: ./gradlew spotlessCheck
     - name: Test with Gradle
       if: github.event.pull_request.merged == true
       run: ./gradlew test

data/.github/workflows/test.yml CHANGED

@@ -21,6 +21,8 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: 1.8
+    - name: scalafmt
+      run: ./gradlew spotlessCheck
     - name: Test with Gradle
       run: ./gradlew test

data/.scalafmt.conf ADDED

@@ -0,0 +1,5 @@
+# https://scalameta.org/scalafmt/#Configuration
+version = "2.3.2"
+newlines.alwaysBeforeElseAfterCurlyIf = true
+newlines.alwaysBeforeTopLevelStatements = true

data/CHANGELOG.md CHANGED

@@ -1,3 +1,18 @@
+0.2.0 (2020-03-10)
+==================
+* [Enhancement] [#23](https://github.com/civitaspo/embulk-output-s3_parquet/pull/23) Limit the usage of swapping ContextClassLoader
+* [BugFix] [#24](https://github.com/civitaspo/embulk-output-s3_parquet/pull/24) Use basic credentials correctly
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update gradle 4.1 -> 6.1
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update parquet-{column,common,encoding,hadoop,jackson,tools} 1.10.1 -> 1.11.0 with the latest parquet-format 2.4.0 -> 2.7.0
+    * [parquet-format CHANGELOG](https://github.com/apache/parquet-format/blob/master/CHANGES.md)
+    * [parquet-mr CHANGELOG](https://github.com/apache/parquet-mr/blob/apache-parquet-1.11.0/CHANGES.md#version-1110)
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update aws-java-sdk 1.11.676 -> 1.11.739
+* [Enhancement] [#20](https://github.com/civitaspo/embulk-output-s3_parquet/pull/20) Update embulk 0.9.20 -> 0.9.23 with embulk-deps-{config,buffer}
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Use scalafmt instead of the Intellij formatter.
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Use scalafmt in CI.
+* [Enhancement] [#19](https://github.com/civitaspo/embulk-output-s3_parquet/pull/19) Enable to run examples locally with some prepared scripts.
 0.1.0 (2019-11-17)
 ==================

data/README.md CHANGED

@@ -131,6 +131,8 @@ out:
 ### Run example:
 ```shell
+$ ./run_s3_local.sh
+$ ./example/prepare_s3_bucket.sh
 $ ./gradlew classpath
 $ embulk run example/config.yml -Ilib
 ```
@@ -138,8 +140,7 @@ $ embulk run example/config.yml -Ilib
 ### Run test:
 ```shell
-## Run fake S3 with localstack
-$ docker run -it --rm -p 4572:4572 -e SERVICES=s3 localstack/localstack
+$ ./run_s3_local.sh
 $ ./gradlew test
 ```

data/build.gradle CHANGED

@@ -3,6 +3,7 @@ plugins {
     id "com.jfrog.bintray" version "1.1"
     id "com.github.jruby-gradle.base" version "1.5.0"
     id "com.adarshr.test-logger" version "1.6.0"  // For Pretty test logging
+    id "com.diffplug.gradle.spotless" version "3.27.1"
 }
 import com.github.jrubygradle.JRubyExec
 repositories {
@@ -13,29 +14,32 @@ configurations {
     provided
 }
-version = "0.1.0"
+version = "0.2.0"
 sourceCompatibility = 1.8
 targetCompatibility = 1.8
 dependencies {
-    compile  "org.embulk:embulk-core:0.9.20"
-    provided "org.embulk:embulk-core:0.9.20"
+    compile  "org.embulk:embulk-core:0.9.23"
+    provided "org.embulk:embulk-core:0.9.23"
     compile 'org.scala-lang:scala-library:2.13.1'
     ['glue', 's3', 'sts'].each { v ->
-        compile "com.amazonaws:aws-java-sdk-${v}:1.11.676"
+        compile "com.amazonaws:aws-java-sdk-${v}:1.11.739"
     }
-    ['column', 'common', 'encoding', 'format', 'hadoop', 'jackson'].each { v ->
-        compile "org.apache.parquet:parquet-${v}:1.10.1"
+    ['column', 'common', 'encoding', 'hadoop', 'jackson'].each { v ->
+        compile "org.apache.parquet:parquet-${v}:1.11.0"
     }
+    // ref. https://github.com/apache/parquet-mr/blob/apache-parquet-1.11.0/pom.xml#L85
+    compile 'org.apache.parquet:parquet-format:2.7.0'
     compile 'org.apache.hadoop:hadoop-common:2.9.2'
     compile 'org.xerial.snappy:snappy-java:1.1.7.3'
+    ['test', 'standards', 'deps-buffer', 'deps-config'].each { v ->
+        testCompile "org.embulk:embulk-${v}:0.9.23"
+    }
     testCompile 'org.scalatest:scalatest_2.13:3.0.8'
-    testCompile 'org.embulk:embulk-test:0.9.20'
-    testCompile 'org.embulk:embulk-standards:0.9.20'
-    testCompile 'org.apache.parquet:parquet-tools:1.10.1'
+    testCompile 'org.apache.parquet:parquet-tools:1.11.0'
     testCompile 'org.apache.hadoop:hadoop-client:2.9.2'
 }
@@ -43,6 +47,12 @@ testlogger {
     theme "mocha"
 }
+spotless {
+    scala {
+        scalafmt('2.3.2').configFile('.scalafmt.conf')
+    }
+}
 task classpath(type: Copy, dependsOn: ["jar"]) {
     doFirst { file("classpath").deleteDir() }
     from (configurations.runtime - configurations.provided + files(jar.archivePath))

data/example/config.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: my-bucket
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj.
   file_ext: snappy.parquet
   compression_codec: snappy

data/example/prepare_s3_bucket.sh ADDED

@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+aws s3 mb s3://example \
+    --endpoint-url http://localhost:4572 \
+    --region us-east-1

data/example/with_catalog.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: dev-baikal-workspace
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj-2.
   file_ext: snappy.parquet
   compression_codec: snappy

data/example/with_logicaltypes.yml CHANGED

@@ -17,7 +17,9 @@ in:
 out:
   type: s3_parquet
-  bucket: my-bucket
+  bucket: example
+  region: us-east-1
+  endpoint: http://127.0.0.1:4572
   path_prefix: path/to/my-obj-2.
   file_ext: snappy.parquet
   compression_codec: snappy

data/gradle/wrapper/gradle-wrapper.jar CHANGED

Binary file

data/gradle/wrapper/gradle-wrapper.properties CHANGED

@@ -1,5 +1,5 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.1-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip

data/gradlew CHANGED

@@ -1,5 +1,21 @@
 #!/usr/bin/env sh
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 ##############################################################################
 ##
 ##  Gradle start up script for UN*X
@@ -28,7 +44,7 @@ APP_NAME="Gradle"
 APP_BASE_NAME=`basename "$0"`
 # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS=""
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
 # Use the maximum available, or set MAX_FD != -1 to use that value.
 MAX_FD="maximum"
@@ -109,8 +125,8 @@ if $darwin; then
     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
 fi
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin ; then
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
     JAVACMD=`cygpath --unix "$JAVACMD"`
@@ -138,19 +154,19 @@ if $cygwin ; then
         else
             eval `echo args$i`="\"$arg\""
         fi
-        i=$((i+1))
+        i=`expr $i + 1`
     done
     case $i in
-        (0) set -- ;;
-        (1) set -- "$args0" ;;
-        (2) set -- "$args0" "$args1" ;;
-        (3) set -- "$args0" "$args1" "$args2" ;;
-        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
-        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
-        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
-        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
-        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
-        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
     esac
 fi
@@ -159,14 +175,9 @@ save () {
     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
     echo " "
 }
-APP_ARGS=$(save "$@")
+APP_ARGS=`save "$@"`
 # Collect all arguments for the java command, following the shell quoting and substitution rules
 eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
-# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
-if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
-  cd "$(dirname "$0")"
-fi
 exec "$JAVACMD" "$@"

data/gradlew.bat CHANGED

@@ -1,3 +1,19 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
 @if "%DEBUG%" == "" @echo off
 @rem ##########################################################################
 @rem
@@ -14,7 +30,7 @@ set APP_BASE_NAME=%~n0
 set APP_HOME=%DIRNAME%
 @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS=
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
 @rem Find java.exe
 if defined JAVA_HOME goto findJavaFromJavaHome

data/run_s3_local.sh ADDED

@@ -0,0 +1,7 @@
+#!/bin/sh
+docker run -it -d --rm \
+    -p 4572:4572 \
+    -e SERVICES=s3 \
+    localstack/localstack

data/src/main/scala/org/embulk/output/s3_parquet/CatalogRegistrator.scala CHANGED

@@ -1,202 +1,250 @@
 package org.embulk.output.s3_parquet
 import java.util.{Optional, Map => JMap}
-import com.amazonaws.services.glue.model.{Column, CreateTableRequest, DeleteTableRequest, GetTableRequest, SerDeInfo, StorageDescriptor, TableInput}
+import com.amazonaws.services.glue.model.{
+  Column,
+  CreateTableRequest,
+  DeleteTableRequest,
+  GetTableRequest,
+  SerDeInfo,
+  StorageDescriptor,
+  TableInput
+}
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.embulk.config.{Config, ConfigDefault, ConfigException}
 import org.embulk.output.s3_parquet.aws.Aws
 import org.embulk.output.s3_parquet.CatalogRegistrator.ColumnOptions
 import org.embulk.spi.Schema
-import org.embulk.spi.`type`.{BooleanType, DoubleType, JsonType, LongType, StringType, TimestampType, Type}
+import org.embulk.spi.`type`.{
+  BooleanType,
+  DoubleType,
+  JsonType,
+  LongType,
+  StringType,
+  TimestampType,
+  Type
+}
 import org.slf4j.{Logger, LoggerFactory}
 import scala.jdk.CollectionConverters._
 import scala.util.Try
-object CatalogRegistrator
-{
-    trait Task
-        extends org.embulk.config.Task
-    {
-        @Config("catalog_id")
-        @ConfigDefault("null")
-        def getCatalogId: Optional[String]
-        @Config("database")
-        def getDatabase: String
-        @Config("table")
-        def getTable: String
-        @Config("column_options")
-        @ConfigDefault("{}")
-        def getColumnOptions: JMap[String, ColumnOptions]
-        @Config("operation_if_exists")
-        @ConfigDefault("\"delete\"")
-        def getOperationIfExists: String
-    }
-    trait ColumnOptions
-    {
-        @Config("type")
-        def getType: String
-    }
-    def apply(aws: Aws,
-              task: Task,
-              schema: Schema,
-              location: String,
-              compressionCodec: CompressionCodecName,
-              loggerOption: Option[Logger] = None,
-              parquetColumnLogicalTypes: Map[String, String] = Map.empty): CatalogRegistrator =
-    {
-        new CatalogRegistrator(aws, task, schema, location, compressionCodec, loggerOption, parquetColumnLogicalTypes)
-    }
+object CatalogRegistrator {
+  trait Task extends org.embulk.config.Task {
+    @Config("catalog_id")
+    @ConfigDefault("null")
+    def getCatalogId: Optional[String]
+    @Config("database")
+    def getDatabase: String
+    @Config("table")
+    def getTable: String
+    @Config("column_options")
+    @ConfigDefault("{}")
+    def getColumnOptions: JMap[String, ColumnOptions]
+    @Config("operation_if_exists")
+    @ConfigDefault("\"delete\"")
+    def getOperationIfExists: String
+  }
+  trait ColumnOptions {
+    @Config("type")
+    def getType: String
+  }
+  def apply(
+      aws: Aws,
+      task: Task,
+      schema: Schema,
+      location: String,
+      compressionCodec: CompressionCodecName,
+      loggerOption: Option[Logger] = None,
+      parquetColumnLogicalTypes: Map[String, String] = Map.empty
+  ): CatalogRegistrator = {
+    new CatalogRegistrator(
+      aws,
+      task,
+      schema,
+      location,
+      compressionCodec,
+      loggerOption,
+      parquetColumnLogicalTypes
+    )
+  }
 }
-class CatalogRegistrator(aws: Aws,
-                         task: CatalogRegistrator.Task,
-                         schema: Schema,
-                         location: String,
-                         compressionCodec: CompressionCodecName,
-                         loggerOption: Option[Logger] = None,
-                         parquetColumnLogicalTypes: Map[String, String] = Map.empty)
-{
-    val logger: Logger = loggerOption.getOrElse(LoggerFactory.getLogger(classOf[CatalogRegistrator]))
-    def run(): Unit =
-    {
-        if (doesTableExists()) {
-            task.getOperationIfExists match {
-                case "skip" =>
-                    logger.info(s"Skip to register the table: ${task.getDatabase}.${task.getTable}")
-                    return
-                case "delete" =>
-                    logger.info(s"Delete the table: ${task.getDatabase}.${task.getTable}")
-                    deleteTable()
-                case unknown =>
-                    throw new ConfigException(s"Unsupported operation: $unknown")
-            }
-        }
-        registerNewParquetTable()
-        showNewTableInfo()
+class CatalogRegistrator(
+    aws: Aws,
+    task: CatalogRegistrator.Task,
+    schema: Schema,
+    location: String,
+    compressionCodec: CompressionCodecName,
+    loggerOption: Option[Logger] = None,
+    parquetColumnLogicalTypes: Map[String, String] = Map.empty
+) {
+  val logger: Logger =
+    loggerOption.getOrElse(LoggerFactory.getLogger(classOf[CatalogRegistrator]))
+  def run(): Unit = {
+    if (doesTableExists()) {
+      task.getOperationIfExists match {
+        case "skip" =>
+          logger.info(
+            s"Skip to register the table: ${task.getDatabase}.${task.getTable}"
+          )
+          return
+        case "delete" =>
+          logger.info(s"Delete the table: ${task.getDatabase}.${task.getTable}")
+          deleteTable()
+        case unknown =>
+          throw new ConfigException(s"Unsupported operation: $unknown")
+      }
     }
-    def showNewTableInfo(): Unit =
-    {
-        val req = new GetTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        val t = aws.withGlue(_.getTable(req)).getTable
-        logger.info(s"Created a table: ${t.toString}")
-    }
-    def doesTableExists(): Boolean =
-    {
-        val req = new GetTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        Try(aws.withGlue(_.getTable(req))).isSuccess
+    registerNewParquetTable()
+    showNewTableInfo()
+  }
+  def showNewTableInfo(): Unit = {
+    val req = new GetTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    val t = aws.withGlue(_.getTable(req)).getTable
+    logger.info(s"Created a table: ${t.toString}")
+  }
+  def doesTableExists(): Boolean = {
+    val req = new GetTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    Try(aws.withGlue(_.getTable(req))).isSuccess
+  }
+  def deleteTable(): Unit = {
+    val req = new DeleteTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setName(task.getTable)
+    aws.withGlue(_.deleteTable(req))
+  }
+  def registerNewParquetTable(): Unit = {
+    logger.info(s"Create a new table: ${task.getDatabase}.${task.getTable}")
+    val req = new CreateTableRequest()
+    task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
+    req.setDatabaseName(task.getDatabase)
+    req.setTableInput(
+      new TableInput()
+        .withName(task.getTable)
+        .withDescription("Created by embulk-output-s3_parquet")
+        .withTableType("EXTERNAL_TABLE")
+        .withParameters(
+          Map(
+            "EXTERNAL" -> "TRUE",
+            "classification" -> "parquet",
+            "parquet.compression" -> compressionCodec.name()
+          ).asJava
+        )
+        .withStorageDescriptor(
+          new StorageDescriptor()
+            .withColumns(getGlueSchema: _*)
+            .withLocation(location)
+            .withCompressed(isCompressed)
+            .withInputFormat(
+              "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
+            )
+            .withOutputFormat(
+              "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
+            )
+            .withSerdeInfo(
+              new SerDeInfo()
+                .withSerializationLibrary(
+                  "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+                )
+                .withParameters(Map("serialization.format" -> "1").asJava)
+            )
+        )
+    )
+    aws.withGlue(_.createTable(req))
+  }
+  private def getGlueSchema: Seq[Column] = {
+    val columnOptions: Map[String, ColumnOptions] =
+      task.getColumnOptions.asScala.toMap
+    schema.getColumns.asScala.toSeq.map { c =>
+      val cType: String =
+        if (columnOptions.contains(c.getName)) columnOptions(c.getName).getType
+        else if (parquetColumnLogicalTypes.contains(c.getName))
+          convertParquetLogicalTypeToGlueType(
+            parquetColumnLogicalTypes(c.getName)
+          )
+        else convertEmbulkTypeToGlueType(c.getType)
+      new Column()
+        .withName(c.getName)
+        .withType(cType)
     }
-    def deleteTable(): Unit =
-    {
-        val req = new DeleteTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setName(task.getTable)
-        aws.withGlue(_.deleteTable(req))
-    }
-    def registerNewParquetTable(): Unit =
-    {
-        logger.info(s"Create a new table: ${task.getDatabase}.${task.getTable}")
-        val req = new CreateTableRequest()
-        task.getCatalogId.ifPresent(cid => req.setCatalogId(cid))
-        req.setDatabaseName(task.getDatabase)
-        req.setTableInput(new TableInput()
-                              .withName(task.getTable)
-                              .withDescription("Created by embulk-output-s3_parquet")
-                              .withTableType("EXTERNAL_TABLE")
-                              .withParameters(Map("EXTERNAL" -> "TRUE",
-                                                  "classification" -> "parquet",
-                                                  "parquet.compression" -> compressionCodec.name()).asJava)
-                              .withStorageDescriptor(new StorageDescriptor()
-                                                         .withColumns(getGlueSchema: _*)
-                                                         .withLocation(location)
-                                                         .withCompressed(isCompressed)
-                                                         .withInputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")
-                                                         .withOutputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")
-                                                         .withSerdeInfo(new SerDeInfo()
-                                                                            .withSerializationLibrary("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
-                                                                            .withParameters(Map("serialization.format" -> "1").asJava)
-                                                                        )
-                                                     )
-                          )
-        aws.withGlue(_.createTable(req))
+  }
+  private def convertParquetLogicalTypeToGlueType(t: String): String = {
+    t match {
+      case "timestamp-millis" => "timestamp"
+      case "timestamp-micros" =>
+        "bigint" // Glue cannot recognize timestamp-micros.
+      case "int8"  => "tinyint"
+      case "int16" => "smallint"
+      case "int32" => "int"
+      case "int64" => "bigint"
+      case "uint8" =>
+        "smallint" // Glue tinyint is a minimum value of -2^7 and a maximum value of 2^7-1
+      case "uint16" =>
+        "int" // Glue smallint is a minimum value of -2^15 and a maximum value of 2^15-1.
+      case "uint32" =>
+        "bigint" // Glue int is a minimum value of-2^31 and a maximum value of 2^31-1.
+      case "uint64" =>
+        throw new ConfigException(
+          "Cannot convert uint64 to Glue data types automatically" +
+            " because the Glue bigint supports a 64-bit signed integer." +
+            " Please use `catalog.column_options` to define the type."
+        )
+      case "json" => "string"
+      case _ =>
+        throw new ConfigException(
+          s"Unsupported a parquet logical type: $t. Please use `catalog.column_options` to define the type."
+        )
     }
-    private def getGlueSchema: Seq[Column] =
-    {
-        val columnOptions: Map[String, ColumnOptions] = task.getColumnOptions.asScala.toMap
-        schema.getColumns.asScala.toSeq.map { c =>
-            val cType: String =
-                if (columnOptions.contains(c.getName)) columnOptions(c.getName).getType
-                else if (parquetColumnLogicalTypes.contains(c.getName)) convertParquetLogicalTypeToGlueType(parquetColumnLogicalTypes(c.getName))
-                else convertEmbulkTypeToGlueType(c.getType)
-            new Column()
-                .withName(c.getName)
-                .withType(cType)
-        }
+  }
+  private def convertEmbulkTypeToGlueType(t: Type): String = {
+    t match {
+      case _: BooleanType   => "boolean"
+      case _: LongType      => "bigint"
+      case _: DoubleType    => "double"
+      case _: StringType    => "string"
+      case _: TimestampType => "string"
+      case _: JsonType      => "string"
+      case unknown =>
+        throw new ConfigException(
+          s"Unsupported embulk type: ${unknown.getName}"
+        )
     }
+  }
-    private def convertParquetLogicalTypeToGlueType(t: String): String =
-    {
-        t match {
-            case "timestamp-millis" => "timestamp"
-            case "timestamp-micros" => "bigint" // Glue cannot recognize timestamp-micros.
-            case "int8"             => "tinyint"
-            case "int16"            => "smallint"
-            case "int32"            => "int"
-            case "int64"            => "bigint"
-            case "uint8"            => "smallint" // Glue tinyint is a minimum value of -2^7 and a maximum value of 2^7-1
-            case "uint16"           => "int" // Glue smallint is a minimum value of -2^15 and a maximum value of 2^15-1.
-            case "uint32"           => "bigint" // Glue int is a minimum value of-2^31 and a maximum value of 2^31-1.
-            case "uint64"           => throw new ConfigException("Cannot convert uint64 to Glue data types automatically" +
-                                                                     " because the Glue bigint supports a 64-bit signed integer." +
-                                                                     " Please use `catalog.column_options` to define the type.")
-            case "json"             => "string"
-            case _                  => throw new ConfigException(s"Unsupported a parquet logical type: $t. Please use `catalog.column_options` to define the type.")
-        }
-    }
-    private def convertEmbulkTypeToGlueType(t: Type): String =
-    {
-        t match {
-            case _: BooleanType   => "boolean"
-            case _: LongType      => "bigint"
-            case _: DoubleType    => "double"
-            case _: StringType    => "string"
-            case _: TimestampType => "string"
-            case _: JsonType      => "string"
-            case unknown          => throw new ConfigException(s"Unsupported embulk type: ${unknown.getName}")
-        }
-    }
-    private def isCompressed: Boolean =
-    {
-        !compressionCodec.equals(CompressionCodecName.UNCOMPRESSED)
-    }
+  private def isCompressed: Boolean = {
+    !compressionCodec.equals(CompressionCodecName.UNCOMPRESSED)
+  }
 }