RubyGems - embulk-input-parquet_hadoop - Versions diffs - 0.1.0 - Mend

embulk-input-parquet_hadoop 0.1.0

Files changed (98) hide show

checksums.yaml +7 -0
data/build.gradle +53 -0
data/classpath/activation-1.1.jar +0 -0
data/classpath/apacheds-i18n-2.0.0-M15.jar +0 -0
data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar +0 -0
data/classpath/api-asn1-api-1.0.0-M20.jar +0 -0
data/classpath/api-util-1.0.0-M20.jar +0 -0
data/classpath/asm-3.1.jar +0 -0
data/classpath/avro-1.7.4.jar +0 -0
data/classpath/commons-beanutils-1.7.0.jar +0 -0
data/classpath/commons-cli-1.2.jar +0 -0
data/classpath/commons-codec-1.6.jar +0 -0
data/classpath/commons-collections-3.2.2.jar +0 -0
data/classpath/commons-compress-1.4.1.jar +0 -0
data/classpath/commons-configuration-1.6.jar +0 -0
data/classpath/commons-digester-1.8.jar +0 -0
data/classpath/commons-httpclient-3.1.jar +0 -0
data/classpath/commons-io-2.4.jar +0 -0
data/classpath/commons-lang-2.6.jar +0 -0
data/classpath/commons-logging-1.1.3.jar +0 -0
data/classpath/commons-math3-3.1.1.jar +0 -0
data/classpath/commons-net-3.1.jar +0 -0
data/classpath/curator-client-2.7.1.jar +0 -0
data/classpath/curator-framework-2.7.1.jar +0 -0
data/classpath/curator-recipes-2.7.1.jar +0 -0
data/classpath/embulk-input-parquet_hadoop-0.1.0.jar +0 -0
data/classpath/gson-2.2.4.jar +0 -0
data/classpath/hadoop-annotations-2.7.3.jar +0 -0
data/classpath/hadoop-auth-2.7.3.jar +0 -0
data/classpath/hadoop-client-2.7.3.jar +0 -0
data/classpath/hadoop-common-2.7.3.jar +0 -0
data/classpath/hadoop-hdfs-2.7.3.jar +0 -0
data/classpath/hadoop-mapreduce-client-app-2.7.3.jar +0 -0
data/classpath/hadoop-mapreduce-client-common-2.7.3.jar +0 -0
data/classpath/hadoop-mapreduce-client-core-2.7.3.jar +0 -0
data/classpath/hadoop-mapreduce-client-jobclient-2.7.3.jar +0 -0
data/classpath/hadoop-mapreduce-client-shuffle-2.7.3.jar +0 -0
data/classpath/hadoop-yarn-api-2.7.3.jar +0 -0
data/classpath/hadoop-yarn-client-2.7.3.jar +0 -0
data/classpath/hadoop-yarn-common-2.7.3.jar +0 -0
data/classpath/hadoop-yarn-server-common-2.7.3.jar +0 -0
data/classpath/hadoop-yarn-server-nodemanager-2.7.3.jar +0 -0
data/classpath/htrace-core-3.1.0-incubating.jar +0 -0
data/classpath/httpclient-4.2.5.jar +0 -0
data/classpath/httpcore-4.2.4.jar +0 -0
data/classpath/jackson-core-asl-1.9.13.jar +0 -0
data/classpath/jackson-jaxrs-1.9.13.jar +0 -0
data/classpath/jackson-mapper-asl-1.9.13.jar +0 -0
data/classpath/jackson-xc-1.9.13.jar +0 -0
data/classpath/jaxb-api-2.2.2.jar +0 -0
data/classpath/jaxb-impl-2.2.3-1.jar +0 -0
data/classpath/jersey-client-1.9.jar +0 -0
data/classpath/jersey-core-1.9.jar +0 -0
data/classpath/jersey-guice-1.9.jar +0 -0
data/classpath/jersey-json-1.9.jar +0 -0
data/classpath/jersey-server-1.9.jar +0 -0
data/classpath/jettison-1.1.jar +0 -0
data/classpath/jetty-util-6.1.26.jar +0 -0
data/classpath/jline-0.9.94.jar +0 -0
data/classpath/jsp-api-2.1.jar +0 -0
data/classpath/jsr305-3.0.0.jar +0 -0
data/classpath/jul-to-slf4j-1.7.24.jar +0 -0
data/classpath/leveldbjni-all-1.8.jar +0 -0
data/classpath/log4j-over-slf4j-1.7.24.jar +0 -0
data/classpath/netty-3.7.0.Final.jar +0 -0
data/classpath/netty-all-4.0.23.Final.jar +0 -0
data/classpath/paranamer-2.3.jar +0 -0
data/classpath/parquet-column-1.8.1.jar +0 -0
data/classpath/parquet-common-1.8.1.jar +0 -0
data/classpath/parquet-encoding-1.8.1.jar +0 -0
data/classpath/parquet-format-2.3.0-incubating.jar +0 -0
data/classpath/parquet-hadoop-1.8.1.jar +0 -0
data/classpath/parquet-jackson-1.8.1.jar +0 -0
data/classpath/parquet-msgpack-0.1.0.jar +0 -0
data/classpath/protobuf-java-2.5.0.jar +0 -0
data/classpath/servlet-api-2.5.jar +0 -0
data/classpath/slf4j-api-1.7.24.jar +0 -0
data/classpath/snappy-java-1.1.1.6.jar +0 -0
data/classpath/stax-api-1.0-2.jar +0 -0
data/classpath/xercesImpl-2.9.1.jar +0 -0
data/classpath/xml-apis-1.3.04.jar +0 -0
data/classpath/xmlenc-0.52.jar +0 -0
data/classpath/xz-1.0.jar +0 -0
data/classpath/zookeeper-3.4.6.jar +0 -0
data/lib/embulk/input/parquet_hadoop.rb +18 -0
data/src/main/java/org/embulk/input/parquet_hadoop/ConfigurationFactory.java +84 -0
data/src/main/java/org/embulk/input/parquet_hadoop/ParquetHadoopInputPlugin.java +257 -0
data/src/main/java/org/embulk/input/parquet_hadoop/ParquetRowReader.java +182 -0
data/src/main/java/org/embulk/input/parquet_hadoop/PluginClassLoaderScope.java +44 -0
data/src/test/java/org/embulk/input/parquet_hadoop/TestParquetHadoopInputPlugin.java +74 -0
data/src/test/resources/test-data/incompatible-schema/data/1.parquet +0 -0
data/src/test/resources/test-data/incompatible-schema/data/2.parquet +0 -0
data/src/test/resources/test-data/incompatible-schema/expected.csv +4 -0
data/src/test/resources/test-data/incompatible-schema/in.yml +2 -0
data/src/test/resources/test-data/simple/data.parquet +0 -0
data/src/test/resources/test-data/simple/expected.csv +3 -0
data/src/test/resources/test-data/simple/in.yml +2 -0
metadata +168 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 11d3bfc5cf66805e9ce41966e90759d5acfbed8f
+  data.tar.gz: 234ecd00864d9c122f01a95ab224c18bdff3ccea
+SHA512:
+  metadata.gz: 05e661e93e1e5c99edec29e2c83cd68d79f45e8c828afb0aeba822e44003057cf5deb1c69e14cf8eebd32755c19a06766c095e9dd0812bc3feee3f3ae4574c0a
+  data.tar.gz: 9b1119067ba7eaeb18ee4ddaac2322881b6177fb3eab92995784745b72d90e6e0c9e60d0bc552afd652f6556392b008628e9065cbd8762ab48a2275cb2a62944

data/build.gradle ADDED Viewed

@@ -0,0 +1,53 @@
+import com.github.jrubygradle.JRubyExec
+dependencies {
+    compile  "org.embulk:embulk-core:0.8.16"
+    provided "org.embulk:embulk-core:0.8.16"
+    compile project(':parquet-msgpack')
+    // for hadoop
+    compile 'org.slf4j:log4j-over-slf4j:1.7.24'
+    // for parquet
+    compile 'org.slf4j:jul-to-slf4j:1.7.24'
+    testCompile "junit:junit:4.+"
+    testCompile 'org.embulk:embulk-standards:0.8.16'
+    testCompile "org.embulk:embulk-test:0.8.16"
+    testCompile 'org.assertj:assertj-core:2.6.+'
+}
+task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
+    jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
+    scriptArgs "${project.projectDir.absolutePath}/build/gemspec"
+    doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "${project.projectDir}/pkg") }
+}
+task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
+    jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
+    scriptArgs "pkg/${project.name}-${project.version}.gem"
+}
+task gemspec {
+    doLast {
+        file('build').mkdirs()
+        file('build/gemspec').write($/
+Gem::Specification.new do |spec|
+  spec.name          = "${project.name}"
+  spec.version       = "${project.version}"
+  spec.authors       = ["Koji AGAWA"]
+  spec.summary       = %[Parquet input plugin for Embulk]
+  spec.description   = %[Loads records from Parquet files via Hadoop FileSystem.]
+  spec.email         = ["agawa_koji@cyberagent.co.jp"]
+  spec.licenses      = ["Apache 2.0"]
+  spec.homepage      = "https://github.com/CyberAgent/embulk-input-parquet_hadoop"
+  spec.files         = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
+  spec.test_files    = spec.files.grep(%r"^(test|spec)/")
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency 'bundler', ['~> 1.0']
+  spec.add_development_dependency 'rake', ['>= 10.0']
+end
+/$)
+    }
+}

data/classpath/activation-1.1.jar ADDED Viewed

Binary file

data/classpath/apacheds-i18n-2.0.0-M15.jar ADDED Viewed

Binary file

data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar ADDED Viewed

Binary file

data/classpath/api-asn1-api-1.0.0-M20.jar ADDED Viewed

Binary file

data/classpath/api-util-1.0.0-M20.jar ADDED Viewed

Binary file

data/classpath/asm-3.1.jar ADDED Viewed

Binary file

data/classpath/avro-1.7.4.jar ADDED Viewed

Binary file

data/classpath/commons-beanutils-1.7.0.jar ADDED Viewed

Binary file

data/classpath/commons-cli-1.2.jar ADDED Viewed

Binary file

data/classpath/commons-codec-1.6.jar ADDED Viewed

Binary file

data/classpath/commons-collections-3.2.2.jar ADDED Viewed

Binary file

data/classpath/commons-compress-1.4.1.jar ADDED Viewed

Binary file

data/classpath/commons-configuration-1.6.jar ADDED Viewed

Binary file

data/classpath/commons-digester-1.8.jar ADDED Viewed

Binary file

data/classpath/commons-httpclient-3.1.jar ADDED Viewed

Binary file

data/classpath/commons-io-2.4.jar ADDED Viewed

Binary file

data/classpath/commons-lang-2.6.jar ADDED Viewed

Binary file

data/classpath/commons-logging-1.1.3.jar ADDED Viewed

Binary file

data/classpath/commons-math3-3.1.1.jar ADDED Viewed

Binary file

data/classpath/commons-net-3.1.jar ADDED Viewed

Binary file

data/classpath/curator-client-2.7.1.jar ADDED Viewed

Binary file

data/classpath/curator-framework-2.7.1.jar ADDED Viewed

Binary file

data/classpath/curator-recipes-2.7.1.jar ADDED Viewed

Binary file

data/classpath/embulk-input-parquet_hadoop-0.1.0.jar ADDED Viewed

Binary file

data/classpath/gson-2.2.4.jar ADDED Viewed

Binary file

data/classpath/hadoop-annotations-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-auth-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-client-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-common-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-hdfs-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-mapreduce-client-app-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-mapreduce-client-common-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-mapreduce-client-core-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-mapreduce-client-jobclient-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-mapreduce-client-shuffle-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-yarn-api-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-yarn-client-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-yarn-common-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-yarn-server-common-2.7.3.jar ADDED Viewed

Binary file

data/classpath/hadoop-yarn-server-nodemanager-2.7.3.jar ADDED Viewed

Binary file

data/classpath/htrace-core-3.1.0-incubating.jar ADDED Viewed

Binary file

data/classpath/httpclient-4.2.5.jar ADDED Viewed

Binary file

data/classpath/httpcore-4.2.4.jar ADDED Viewed

Binary file

data/classpath/jackson-core-asl-1.9.13.jar ADDED Viewed

Binary file

data/classpath/jackson-jaxrs-1.9.13.jar ADDED Viewed

Binary file

data/classpath/jackson-mapper-asl-1.9.13.jar ADDED Viewed

Binary file

data/classpath/jackson-xc-1.9.13.jar ADDED Viewed

Binary file

data/classpath/jaxb-api-2.2.2.jar ADDED Viewed

Binary file

data/classpath/jaxb-impl-2.2.3-1.jar ADDED Viewed

Binary file

data/classpath/jersey-client-1.9.jar ADDED Viewed

Binary file

data/classpath/jersey-core-1.9.jar ADDED Viewed

Binary file

data/classpath/jersey-guice-1.9.jar ADDED Viewed

Binary file

data/classpath/jersey-json-1.9.jar ADDED Viewed

Binary file

data/classpath/jersey-server-1.9.jar ADDED Viewed

Binary file

data/classpath/jettison-1.1.jar ADDED Viewed

Binary file

data/classpath/jetty-util-6.1.26.jar ADDED Viewed

Binary file

data/classpath/jline-0.9.94.jar ADDED Viewed

Binary file

data/classpath/jsp-api-2.1.jar ADDED Viewed

Binary file

data/classpath/jsr305-3.0.0.jar ADDED Viewed

Binary file

data/classpath/jul-to-slf4j-1.7.24.jar ADDED Viewed

Binary file

data/classpath/leveldbjni-all-1.8.jar ADDED Viewed

Binary file

data/classpath/log4j-over-slf4j-1.7.24.jar ADDED Viewed

Binary file

data/classpath/netty-3.7.0.Final.jar ADDED Viewed

Binary file

data/classpath/netty-all-4.0.23.Final.jar ADDED Viewed

Binary file

data/classpath/paranamer-2.3.jar ADDED Viewed

Binary file

data/classpath/parquet-column-1.8.1.jar ADDED Viewed

Binary file

data/classpath/parquet-common-1.8.1.jar ADDED Viewed

Binary file

data/classpath/parquet-encoding-1.8.1.jar ADDED Viewed

Binary file

data/classpath/parquet-format-2.3.0-incubating.jar ADDED Viewed

Binary file

data/classpath/parquet-hadoop-1.8.1.jar ADDED Viewed

Binary file

data/classpath/parquet-jackson-1.8.1.jar ADDED Viewed

Binary file

data/classpath/parquet-msgpack-0.1.0.jar ADDED Viewed

Binary file

data/classpath/protobuf-java-2.5.0.jar ADDED Viewed

Binary file

data/classpath/servlet-api-2.5.jar ADDED Viewed

Binary file

data/classpath/slf4j-api-1.7.24.jar ADDED Viewed

Binary file

data/classpath/snappy-java-1.1.1.6.jar ADDED Viewed

Binary file

data/classpath/stax-api-1.0-2.jar ADDED Viewed

Binary file

data/classpath/xercesImpl-2.9.1.jar ADDED Viewed

Binary file

data/classpath/xml-apis-1.3.04.jar ADDED Viewed

Binary file

data/classpath/xmlenc-0.52.jar ADDED Viewed

Binary file

data/classpath/xz-1.0.jar ADDED Viewed

Binary file

data/classpath/zookeeper-3.4.6.jar ADDED Viewed

Binary file

data/lib/embulk/input/parquet_hadoop.rb ADDED Viewed

@@ -0,0 +1,18 @@
+#
+#  Copyright 2017 CyberAgent, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+Embulk::JavaPlugin.register_input(
+  "parquet_hadoop", "org.embulk.input.parquet_hadoop.ParquetHadoopInputPlugin",
+  File.expand_path('../../../../classpath', __FILE__))

data/src/main/java/org/embulk/input/parquet_hadoop/ConfigurationFactory.java ADDED Viewed

@@ -0,0 +1,84 @@
+/*
+ * This class includes code from embulk-input-hadoop.
+ *   (https://github.com/civitaspo/embulk-input-hdfs)
+ *
+ * The MIT License
+ * Copyright (c) 2015 Civitaspo
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.embulk.input.parquet_hadoop;
+import org.apache.hadoop.conf.Configuration;
+import org.embulk.config.Config;
+import org.embulk.config.ConfigDefault;
+import org.embulk.config.ConfigException;
+import org.embulk.spi.Exec;
+import org.slf4j.Logger;
+import java.io.File;
+import java.net.MalformedURLException;
+import java.util.List;
+import java.util.Map;
+public class ConfigurationFactory
+{
+    private static final Logger logger = Exec.getLogger(ConfigurationFactory.class);
+    interface Task
+    {
+        @Config("config_files")
+        @ConfigDefault("[]")
+        List<String> getConfigFiles();
+        @Config("config")
+        @ConfigDefault("{}")
+        Map<String, String> getConfig();
+    }
+    private ConfigurationFactory()
+    {
+    }
+    public static Configuration create(Task task)
+    {
+        Configuration c = new Configuration();
+        for (String f : task.getConfigFiles()) {
+            try {
+                logger.trace("embulk-input-parquet_hadoop: load a config file: {}", f);
+                c.addResource(new File(f).toURI().toURL());
+            } catch (MalformedURLException e) {
+                throw new ConfigException(e);
+            }
+        }
+        for (Map.Entry<String, String> entry : task.getConfig().entrySet()) {
+            logger.trace("embulk-input-parquet_hadoop: load a config: {}:{}", entry.getKey(), entry.getValue());
+            c.set(entry.getKey(), entry.getValue());
+        }
+        // For logging
+        for (Map.Entry<String, String> entry : c) {
+            logger.trace("embulk-input-parquet_hadoop: loaded: {}: {}", entry.getKey(), entry.getValue());
+        }
+        logger.trace("embulk-input-parquet_hadoop: loaded files: {}", c);
+        return c;
+    }
+}

data/src/main/java/org/embulk/input/parquet_hadoop/ParquetHadoopInputPlugin.java ADDED Viewed

@@ -0,0 +1,257 @@
+/*
+ * Copyright 2017 CyberAgent, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.embulk.input.parquet_hadoop;
+import com.google.common.base.Function;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathNotFoundException;
+import org.apache.parquet.ParquetRuntimeException;
+import org.apache.parquet.hadoop.util.HiddenFileFilter;
+import org.embulk.config.Config;
+import org.embulk.config.ConfigDefault;
+import org.embulk.config.ConfigDiff;
+import org.embulk.config.ConfigSource;
+import org.embulk.config.Task;
+import org.embulk.config.TaskReport;
+import org.embulk.config.TaskSource;
+import org.embulk.spi.Column;
+import org.embulk.spi.DataException;
+import org.embulk.spi.Exec;
+import org.embulk.spi.InputPlugin;
+import org.embulk.spi.PageBuilder;
+import org.embulk.spi.PageOutput;
+import org.embulk.spi.Schema;
+import org.embulk.spi.type.Types;
+import org.msgpack.value.Value;
+import org.slf4j.Logger;
+import org.slf4j.bridge.SLF4JBridgeHandler;
+import studio.adtech.parquet.msgpack.read.MessagePackReadSupport;
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.List;
+import java.util.logging.Level;
+public class ParquetHadoopInputPlugin
+        implements InputPlugin
+{
+    private static final Logger logger = Exec.getLogger(ParquetHadoopInputPlugin.class);
+    public interface PluginTask
+            extends Task, ConfigurationFactory.Task
+    {
+        @Config("path")
+        String getPath();
+        @Config("parquet_log_level")
+        @ConfigDefault("\"INFO\"")
+        String getParquetLogLevel();
+        List<String> getFiles();
+        void setFiles(List<String> files);
+    }
+    Schema newSchema()
+    {
+        return Schema.builder().add("record", Types.JSON).build();
+    }
+    @Override
+    public ConfigDiff transaction(ConfigSource config,
+            InputPlugin.Control control)
+    {
+        PluginTask task = config.loadConfig(PluginTask.class);
+        configureParquetLogger(task);
+        Path rootPath = new Path(task.getPath());
+        try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
+            Configuration conf = ConfigurationFactory.create(task);
+            FileSystem fs = FileSystem.get(rootPath.toUri(), conf);
+            List<FileStatus> statusList = listFileStatuses(fs, rootPath);
+            if (statusList.isEmpty()) {
+                throw new PathNotFoundException(rootPath.toString());
+            }
+            for (FileStatus status : statusList) {
+                logger.debug("embulk-input-parquet_hadoop: Loading paths: {}, length: {}",
+                        status.getPath(), status.getLen());
+            }
+            List<String> files = Lists.transform(statusList, new Function<FileStatus, String>() {
+                    @Nullable
+                    @Override
+                    public String apply(@Nullable FileStatus input) {
+                        return input.getPath().toString();
+                    }
+            });
+            task.setFiles(files);
+        } catch (IOException e) {
+            throw Throwables.propagate(e);
+        }
+        Schema schema = newSchema();
+        int taskCount = task.getFiles().size();
+        return resume(task.dump(), schema, taskCount, control);
+    }
+    @Override
+    public ConfigDiff resume(TaskSource taskSource,
+            Schema schema, int taskCount,
+            InputPlugin.Control control)
+    {
+        control.run(taskSource, schema, taskCount);
+        return Exec.newConfigDiff();
+    }
+    @Override
+    public void cleanup(TaskSource taskSource,
+            Schema schema, int taskCount,
+            List<TaskReport> successTaskReports)
+    {
+    }
+    @Override
+    public TaskReport run(TaskSource taskSource,
+            Schema schema, int taskIndex,
+            PageOutput output)
+    {
+        PluginTask task = taskSource.loadTask(PluginTask.class);
+        configureParquetLogger(task);
+        final Column jsonColumn = schema.getColumn(0);
+        Configuration conf;
+        Path filePath;
+        try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
+            conf = ConfigurationFactory.create(task);
+            filePath = new Path(task.getFiles().get(taskIndex));
+        }
+        try (PageBuilder pageBuilder = newPageBuilder(schema, output)) {
+            ParquetRowReader<Value> reader;
+            try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
+                reader = new ParquetRowReader<>(conf, filePath, new MessagePackReadSupport());
+            } catch (ParquetRuntimeException | IOException e) {
+                throw new DataException(e);
+            }
+            Value value;
+            while (true) {
+                try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
+                    value = reader.read();
+                } catch (ParquetRuntimeException | IOException e) {
+                    throw new DataException(e);
+                }
+                if (value == null) {
+                    break;
+                }
+                pageBuilder.setJson(jsonColumn, value);
+                pageBuilder.addRecord();
+            }
+            pageBuilder.finish();
+            try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
+                reader.close();
+            } catch (ParquetRuntimeException | IOException e) {
+                throw new DataException(e);
+            }
+        }
+        TaskReport report = Exec.newTaskReport();
+        return report;
+    }
+    @Override
+    public ConfigDiff guess(ConfigSource config)
+    {
+        return Exec.newConfigDiff();
+    }
+    private PageBuilder newPageBuilder(Schema schema, PageOutput output)
+    {
+        return new PageBuilder(Exec.getBufferAllocator(), schema, output);
+    }
+    private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException {
+        List<FileStatus> fileStatuses = Lists.newArrayList();
+        FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE);
+        if (entries == null) {
+            return fileStatuses;
+        }
+        for (FileStatus entry : entries) {
+            if (entry.isDirectory()) {
+                List<FileStatus> subEntries = listRecursive(fs, entry);
+                fileStatuses.addAll(subEntries);
+            } else {
+                fileStatuses.add(entry);
+            }
+        }
+        return fileStatuses;
+    }
+    private List<FileStatus> listRecursive(FileSystem fs, FileStatus status) throws IOException
+    {
+        List<FileStatus> statusList = Lists.newArrayList();
+        if (status.isDirectory()) {
+            FileStatus[] entries = fs.listStatus(status.getPath(), HiddenFileFilter.INSTANCE);
+            for (FileStatus entry : entries) {
+                statusList.addAll(listRecursive(fs, entry));
+            }
+        } else {
+            statusList.add(status);
+        }
+        return statusList;
+    }
+    private static void configureParquetLogger(PluginTask task)
+    {
+        // delegate java.util.logging to slf4j.
+        java.util.logging.Logger parquetLogger = java.util.logging.Logger.getLogger("org.apache.parquet");
+        if (parquetLogger.getHandlers().length == 0) {
+            parquetLogger.addHandler(new SLF4JBridgeHandler());
+            parquetLogger.setUseParentHandlers(false);
+        }
+        Level level;
+        try {
+            level = Level.parse(task.getParquetLogLevel());
+        } catch (IllegalArgumentException e) {
+            logger.warn("embulk-input-parquet_hadoop: Invalid parquet_log_level", e);
+            level = Level.WARNING;
+        }
+        // invoke static initializer that overrides log level.
+        try {
+            Class.forName("org.apache.parquet.Log");
+        } catch (ClassNotFoundException e) {
+            logger.warn("", e);
+        }
+        parquetLogger.setLevel(level);
+    }
+}

data/src/main/java/org/embulk/input/parquet_hadoop/ParquetRowReader.java ADDED Viewed

@@ -0,0 +1,182 @@
+/*
+ * This class includes code from Apache Parquet MR.
+ *   (org.apache.parquet.hadoop.InternalParquetRecordReader)
+ *
+ * Copyright 2017 CyberAgent, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.embulk.input.parquet_hadoop;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.UnmaterializableRecordCounter;
+import org.apache.parquet.hadoop.api.InitContext;
+import org.apache.parquet.hadoop.api.ReadSupport;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.io.ColumnIOFactory;
+import org.apache.parquet.io.MessageColumnIO;
+import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.io.RecordReader;
+import org.apache.parquet.io.api.RecordMaterializer;
+import org.apache.parquet.schema.MessageType;
+import org.embulk.spi.Exec;
+import org.slf4j.Logger;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+public class ParquetRowReader<T> {
+    private static final Logger logger = Exec.getLogger(ParquetRowReader.class);
+    private final Path filePath;
+    private final ParquetFileReader reader;
+    private final long total;
+    private final ColumnIOFactory columnIOFactory;
+    private final RecordMaterializer<T> recordConverter;
+    private final MessageType requestedSchema;
+    private final MessageType fileSchema;
+    private final UnmaterializableRecordCounter unmaterializableRecordCounter;
+    private long current = 0;
+    private long totalCountLoadedSoFar = 0;
+    private int currentBlock = -1;
+    private RecordReader<T> recordReader;
+    // TODO: make configurable ?
+    private static final boolean strictTypeChecking = true;
+    private static final FilterCompat.Filter filter = FilterCompat.NOOP;
+    public ParquetRowReader(Configuration configuration, Path filePath, ReadSupport<T> readSupport) throws IOException {
+        this.filePath = filePath;
+        ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, filePath, ParquetMetadataConverter.NO_FILTER);
+        List<BlockMetaData> blocks = parquetMetadata.getBlocks();
+        FileMetaData fileMetadata = parquetMetadata.getFileMetaData();
+        this.fileSchema = fileMetadata.getSchema();
+        Map<String, String> keyValueMetadata = fileMetadata.getKeyValueMetaData();
+        ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
+                configuration, toSetMultiMap(keyValueMetadata), fileSchema));
+        this.columnIOFactory = new ColumnIOFactory(fileMetadata.getCreatedBy());
+        this.requestedSchema = readContext.getRequestedSchema();
+        this.recordConverter = readSupport.prepareForRead(
+                configuration, fileMetadata.getKeyValueMetaData(), fileSchema, readContext);
+        List<ColumnDescriptor> columns = requestedSchema.getColumns();
+        reader = new ParquetFileReader(configuration, fileMetadata, filePath, blocks, columns);
+        long total = 0;
+        for (BlockMetaData block : blocks) {
+            total += block.getRowCount();
+        }
+        this.total = total;
+        this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
+        logger.info("ParquetRowReader initialized will read a total of " + total + " records.");
+    }
+    private void checkRead() throws IOException {
+        if (current == totalCountLoadedSoFar) {
+            PageReadStore pages = reader.readNextRowGroup();
+            if (pages == null) {
+                throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
+            }
+            MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
+            recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
+            totalCountLoadedSoFar += pages.getRowCount();
+            ++ currentBlock;
+        }
+    }
+    /**
+     * @return the next record or null if finished
+     * @throws IOException
+     * @throws ParquetDecodingException
+     */
+    public T read() throws IOException {
+        T currentValue = null;
+        boolean recordFound = false;
+        while (!recordFound) {
+            // no more records left
+            if (current >= total) {
+                return null;
+            }
+            try {
+                checkRead();
+                current++;
+                try {
+                    currentValue = recordReader.read();
+                } catch (RecordMaterializer.RecordMaterializationException e) {
+                    // this might throw, but it's fatal if it does.
+                    unmaterializableRecordCounter.incErrors(e);
+                    logger.debug("skipping a corrupt record");
+                    continue;
+                }
+                if (recordReader.shouldSkipCurrentRecord()) {
+                    // this record is being filtered via the filter2 package
+                    logger.debug("skipping record");
+                    continue;
+                }
+                if (currentValue == null) {
+                    // only happens with FilteredRecordReader at end of block
+                    current = totalCountLoadedSoFar;
+                    logger.debug("filtered record reader reached end of block");
+                    continue;
+                }
+                recordFound = true;
+                logger.debug("read value: {}", currentValue);
+            } catch (RuntimeException e) {
+                throw new ParquetDecodingException(
+                        String.format("Can not read value at %d in block %d in file %s", current, currentBlock, filePath), e);
+            }
+        }
+        return currentValue;
+    }
+    public void close() throws IOException {
+        reader.close();
+    }
+    private static <K, V> Map<K, Set<V>> toSetMultiMap(Map<K, V> map) {
+        Map<K, Set<V>> setMultiMap = new HashMap<>();
+        for (Map.Entry<K, V> entry : map.entrySet()) {
+            Set<V> set = new HashSet<>();
+            set.add(entry.getValue());
+            setMultiMap.put(entry.getKey(), Collections.unmodifiableSet(set));
+        }
+        return Collections.unmodifiableMap(setMultiMap);
+    }
+}

data/src/main/java/org/embulk/input/parquet_hadoop/PluginClassLoaderScope.java ADDED Viewed

@@ -0,0 +1,44 @@
+/*
+ * Copyright 2017 CyberAgent, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.embulk.input.parquet_hadoop;
+/**
+ * Set the context class loader to plugin's class loader.
+ *
+ * {@link org.apache.hadoop.fs.FileSystem#loadFileSystems()} loads FileSystem implementation via
+ * {@link java.util.ServiceLoader}.
+ * It's look up services via system class loader if context class loader is null.
+ * However system class loader failed to look up FileSystem implementations because
+ * hadoop jars is not in classpath of system class loader.
+ * So we need to set context class loader to plugins' class loader.
+ */
+class PluginClassLoaderScope implements AutoCloseable {
+    private static final ClassLoader PLUGIN_CLASS_LOADER =
+            ParquetHadoopInputPlugin.class.getClassLoader();
+    private final ClassLoader original;
+    public PluginClassLoaderScope() {
+        Thread current = Thread.currentThread();
+        this.original = current.getContextClassLoader();
+        Thread.currentThread().setContextClassLoader(PLUGIN_CLASS_LOADER);
+    }
+    @Override
+    public void close() {
+        Thread.currentThread().setContextClassLoader(original);
+    }
+}

data/src/test/java/org/embulk/input/parquet_hadoop/TestParquetHadoopInputPlugin.java ADDED Viewed

@@ -0,0 +1,74 @@
+/*
+ * Copyright 2017 CyberAgent, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.embulk.input.parquet_hadoop;
+import com.google.common.io.Resources;
+import org.embulk.config.ConfigSource;
+import org.embulk.spi.InputPlugin;
+import org.embulk.test.TestingEmbulk;
+import org.junit.Rule;
+import org.junit.Test;
+import java.nio.file.Path;
+import static org.embulk.test.EmbulkTests.readFile;
+import static org.embulk.test.EmbulkTests.readResource;
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+public class TestParquetHadoopInputPlugin
+{
+    private static final String RESOURCE_NAME_PREFIX = "test-data/";
+    @Rule
+    public TestingEmbulk embulk = TestingEmbulk.builder()
+            .registerPlugin(InputPlugin.class, "parquet_hadoop", ParquetHadoopInputPlugin.class)
+            .build();
+    @Test
+    public void testSimple() throws Exception
+    {
+        assertRecordsByResource(embulk, "simple/in.yml", "simple/data.parquet",
+                "simple/expected.csv");
+    }
+    @Test
+    public void testIncompatibleSchema() throws Exception
+    {
+        assertRecordsByResource(embulk, "incompatible-schema/in.yml", "incompatible-schema/data",
+                "incompatible-schema/expected.csv");
+    }
+    static void assertRecordsByResource(TestingEmbulk embulk,
+                                        String inConfigYamlResourceName,
+                                        String sourceResourceName, String resultCsvResourceName)
+            throws Exception
+    {
+        Path outputPath = embulk.createTempFile("csv");
+        // in: config
+        String inputPath = Resources.getResource(RESOURCE_NAME_PREFIX + sourceResourceName).toURI().toString();
+        ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
+                .set("path", inputPath);
+        TestingEmbulk.RunResult result = embulk.inputBuilder()
+                .in(inConfig)
+                .outputPath(outputPath)
+                .run();
+        assertThat(readFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
+    }
+}

data/src/test/resources/test-data/incompatible-schema/data/1.parquet ADDED Viewed

Binary file

data/src/test/resources/test-data/incompatible-schema/data/2.parquet ADDED Viewed

Binary file

data/src/test/resources/test-data/incompatible-schema/expected.csv ADDED Viewed

@@ -0,0 +1,4 @@
+"{""c_int"":1}"
+"{""c_int"":2}"
+"{""c_str"":""hoge"",""c_bool"":true}"
+"{""c_str"":""fuga"",""c_bool"":false}"

data/src/test/resources/test-data/incompatible-schema/in.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ type: parquet_hadoop
2	+ parquet_log_level: WARNING

data/src/test/resources/test-data/simple/data.parquet ADDED Viewed

Binary file

data/src/test/resources/test-data/simple/expected.csv ADDED Viewed

@@ -0,0 +1,3 @@
+"{""c_str"":""foo"",""c_int"":1,""c_double"":1.5,""c_bool"":true,""c_json"":""{\""foo\"":1}""}"
+"{""c_str"":""bar"",""c_int"":2,""c_double"":2.5,""c_bool"":false,""c_json"":""{\""bar\"":2}""}"
+"{""c_str"":""baz"",""c_int"":3,""c_double"":3.5,""c_bool"":true,""c_json"":""{\""baz\"":3}""}"

data/src/test/resources/test-data/simple/in.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ type: parquet_hadoop
2	+ parquet_log_level: WARNING

metadata ADDED Viewed

@@ -0,0 +1,168 @@
+--- !ruby/object:Gem::Specification
+name: embulk-input-parquet_hadoop
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Koji AGAWA
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2017-03-08 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  name: bundler
+  prerelease: false
+  type: :development
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  name: rake
+  prerelease: false
+  type: :development
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '10.0'
+description: Loads records from Parquet files via Hadoop FileSystem.
+email:
+- agawa_koji@cyberagent.co.jp
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- build.gradle
+- classpath/activation-1.1.jar
+- classpath/apacheds-i18n-2.0.0-M15.jar
+- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
+- classpath/api-asn1-api-1.0.0-M20.jar
+- classpath/api-util-1.0.0-M20.jar
+- classpath/asm-3.1.jar
+- classpath/avro-1.7.4.jar
+- classpath/commons-beanutils-1.7.0.jar
+- classpath/commons-cli-1.2.jar
+- classpath/commons-codec-1.6.jar
+- classpath/commons-collections-3.2.2.jar
+- classpath/commons-compress-1.4.1.jar
+- classpath/commons-configuration-1.6.jar
+- classpath/commons-digester-1.8.jar
+- classpath/commons-httpclient-3.1.jar
+- classpath/commons-io-2.4.jar
+- classpath/commons-lang-2.6.jar
+- classpath/commons-logging-1.1.3.jar
+- classpath/commons-math3-3.1.1.jar
+- classpath/commons-net-3.1.jar
+- classpath/curator-client-2.7.1.jar
+- classpath/curator-framework-2.7.1.jar
+- classpath/curator-recipes-2.7.1.jar
+- classpath/embulk-input-parquet_hadoop-0.1.0.jar
+- classpath/gson-2.2.4.jar
+- classpath/hadoop-annotations-2.7.3.jar
+- classpath/hadoop-auth-2.7.3.jar
+- classpath/hadoop-client-2.7.3.jar
+- classpath/hadoop-common-2.7.3.jar
+- classpath/hadoop-hdfs-2.7.3.jar
+- classpath/hadoop-mapreduce-client-app-2.7.3.jar
+- classpath/hadoop-mapreduce-client-common-2.7.3.jar
+- classpath/hadoop-mapreduce-client-core-2.7.3.jar
+- classpath/hadoop-mapreduce-client-jobclient-2.7.3.jar
+- classpath/hadoop-mapreduce-client-shuffle-2.7.3.jar
+- classpath/hadoop-yarn-api-2.7.3.jar
+- classpath/hadoop-yarn-client-2.7.3.jar
+- classpath/hadoop-yarn-common-2.7.3.jar
+- classpath/hadoop-yarn-server-common-2.7.3.jar
+- classpath/hadoop-yarn-server-nodemanager-2.7.3.jar
+- classpath/htrace-core-3.1.0-incubating.jar
+- classpath/httpclient-4.2.5.jar
+- classpath/httpcore-4.2.4.jar
+- classpath/jackson-core-asl-1.9.13.jar
+- classpath/jackson-jaxrs-1.9.13.jar
+- classpath/jackson-mapper-asl-1.9.13.jar
+- classpath/jackson-xc-1.9.13.jar
+- classpath/jaxb-api-2.2.2.jar
+- classpath/jaxb-impl-2.2.3-1.jar
+- classpath/jersey-client-1.9.jar
+- classpath/jersey-core-1.9.jar
+- classpath/jersey-guice-1.9.jar
+- classpath/jersey-json-1.9.jar
+- classpath/jersey-server-1.9.jar
+- classpath/jettison-1.1.jar
+- classpath/jetty-util-6.1.26.jar
+- classpath/jline-0.9.94.jar
+- classpath/jsp-api-2.1.jar
+- classpath/jsr305-3.0.0.jar
+- classpath/jul-to-slf4j-1.7.24.jar
+- classpath/leveldbjni-all-1.8.jar
+- classpath/log4j-over-slf4j-1.7.24.jar
+- classpath/netty-3.7.0.Final.jar
+- classpath/netty-all-4.0.23.Final.jar
+- classpath/paranamer-2.3.jar
+- classpath/parquet-column-1.8.1.jar
+- classpath/parquet-common-1.8.1.jar
+- classpath/parquet-encoding-1.8.1.jar
+- classpath/parquet-format-2.3.0-incubating.jar
+- classpath/parquet-hadoop-1.8.1.jar
+- classpath/parquet-jackson-1.8.1.jar
+- classpath/parquet-msgpack-0.1.0.jar
+- classpath/protobuf-java-2.5.0.jar
+- classpath/servlet-api-2.5.jar
+- classpath/slf4j-api-1.7.24.jar
+- classpath/snappy-java-1.1.1.6.jar
+- classpath/stax-api-1.0-2.jar
+- classpath/xercesImpl-2.9.1.jar
+- classpath/xml-apis-1.3.04.jar
+- classpath/xmlenc-0.52.jar
+- classpath/xz-1.0.jar
+- classpath/zookeeper-3.4.6.jar
+- lib/embulk/input/parquet_hadoop.rb
+- src/main/java/org/embulk/input/parquet_hadoop/ConfigurationFactory.java
+- src/main/java/org/embulk/input/parquet_hadoop/ParquetHadoopInputPlugin.java
+- src/main/java/org/embulk/input/parquet_hadoop/ParquetRowReader.java
+- src/main/java/org/embulk/input/parquet_hadoop/PluginClassLoaderScope.java
+- src/test/java/org/embulk/input/parquet_hadoop/TestParquetHadoopInputPlugin.java
+- src/test/resources/test-data/incompatible-schema/data/1.parquet
+- src/test/resources/test-data/incompatible-schema/data/2.parquet
+- src/test/resources/test-data/incompatible-schema/expected.csv
+- src/test/resources/test-data/incompatible-schema/in.yml
+- src/test/resources/test-data/simple/data.parquet
+- src/test/resources/test-data/simple/expected.csv
+- src/test/resources/test-data/simple/in.yml
+homepage: https://github.com/CyberAgent/embulk-input-parquet_hadoop
+licenses:
+- Apache 2.0
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: Parquet input plugin for Embulk
+test_files: []