RubyGems - embulk-input-hdfs - Versions diffs - 0.1.1 → 0.1.4 - Mend

embulk-input-hdfs 0.1.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +2 -2
data/build.gradle +1 -1
data/src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java +26 -17
data/src/main/java/org/embulk/input/hdfs/HdfsPartialFileInputStream.java +3 -2
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b559162cba6af0dd036310522baf9559ac4ebcf3
-  data.tar.gz: 07f1fc7beb1205ba2baf4984c3495a3942514f68
+  metadata.gz: aa9425d56cb955c999bdfc8f307004f260065797
+  data.tar.gz: d11249a30d11ad70595d5961383f39d626290f4e
 SHA512:
-  metadata.gz: 85a7d5b8ba72ed14787881251084edb5e0f59e6424c17377c2545fa9f0e4c95444f3b7b40a61c1b30636d228c6973fec517f4a10be0b6338b3f73b0c8524abd5
-  data.tar.gz: 898da79bf93d26349a4916d5118980921e90829f5b6745e53678d661fb87d632a0ab4e2b9974ca5bf713fae62a58b305d2ea153675de88c5c4a05c2780f3ea8a
+  metadata.gz: 46e6e225a7cc1acf6a1396ecdf72b8fcce1b0679196bfe1bfd18baec6015d602dbedbc22e3bda74f2ce916ddbce8043152ed5d93302850ca1a79cc54f07a1fb8
+  data.tar.gz: e49523b895c4d11e10e25295f4298d5f8c2113c1d7eefc48e3a8862fadcea7eaa51c1e677b26e5d24459198da66b58668184d6c4b14f5b86878cba6dbc1384fa

data/README.md CHANGED

@@ -12,7 +12,7 @@ Read files on Hdfs.
 - **config_files** list of paths to Hadoop's configuration files (array of strings, default: `[]`)
 - **config** overwrites configuration parameters (hash, default: `{}`)
-- **input_path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s`.
+- **path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s`.
 - **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property.
 - **partition** when this is true, partition input files and increase task count. (default: `true`)
 - **num_partitions** number of partitions. (default: `Runtime.getRuntime().availableProcessors()`)
@@ -30,7 +30,7 @@ in:
     dfs.replication: 1
     fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
     fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
-  input_path: /user/embulk/test/%Y-%m-%d/*
+  path: /user/embulk/test/%Y-%m-%d/*
   rewind_seconds: 86400
   partition: true
   num_partitions: 30

data/build.gradle CHANGED

@@ -12,7 +12,7 @@ configurations {
     provided
 }
-version = "0.1.1"
+version = "0.1.4"
 sourceCompatibility = 1.7
 targetCompatibility = 1.7

data/src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java CHANGED

@@ -1,34 +1,34 @@
 package org.embulk.input.hdfs;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
 import com.google.common.base.Function;
-import com.google.common.base.Optional;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.embulk.config.TaskReport;
+import org.apache.hadoop.fs.PathNotFoundException;
 import org.embulk.config.Config;
 import org.embulk.config.ConfigDefault;
-import org.embulk.config.ConfigInject;
 import org.embulk.config.ConfigDiff;
+import org.embulk.config.ConfigInject;
 import org.embulk.config.ConfigSource;
 import org.embulk.config.Task;
+import org.embulk.config.TaskReport;
 import org.embulk.config.TaskSource;
-import org.embulk.spi.*;
-import org.embulk.spi.util.InputStreamFileInput;
+import org.embulk.spi.BufferAllocator;
+import org.embulk.spi.Exec;
+import org.embulk.spi.FileInputPlugin;
+import org.embulk.spi.TransactionalFileInput;
 import org.embulk.spi.util.InputStreamTransactionalFileInput;
 import org.jruby.embed.ScriptingContainer;
 import org.slf4j.Logger;
 import javax.annotation.Nullable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 public class HdfsFileInputPlugin implements FileInputPlugin
 {
@@ -44,8 +44,8 @@ public class HdfsFileInputPlugin implements FileInputPlugin
         @ConfigDefault("{}")
         public Map<String, String> getConfig();
-        @Config("input_path")
-        public String getInputPath();
+        @Config("path")
+        public String getPath();
         @Config("rewind_seconds")
         @ConfigDefault("0")
@@ -72,9 +72,14 @@ public class HdfsFileInputPlugin implements FileInputPlugin
         PluginTask task = config.loadConfig(PluginTask.class);
         // listing Files
-        String pathString = strftime(task.getInputPath(), task.getRewindSeconds());
+        String pathString = strftime(task.getPath(), task.getRewindSeconds());
         try {
             List<String> originalFileList = buildFileList(getFs(task), pathString);
+            if (originalFileList.isEmpty()) {
+                throw new PathNotFoundException(pathString);
+            }
             task.setFiles(allocateHdfsFilesToTasks(task, getFs(task), originalFileList));
             logger.info("Loading target files: {}", originalFileList);
         }
@@ -190,13 +195,17 @@ public class HdfsFileInputPlugin implements FileInputPlugin
             throws IOException
     {
         List<String> fileList = new ArrayList<>();
-        for (FileStatus entry : fs.globStatus(new Path(pathString))) {
+        Path rootPath = new Path(pathString);
+        for (FileStatus entry : fs.globStatus(rootPath)) {
             if (entry.isDirectory()) {
                 fileList.addAll(lsr(fs, entry));
-            } else {
+            }
+            else {
                 fileList.add(entry.getPath().toString());
             }
         }
         return fileList;
     }

data/src/main/java/org/embulk/input/hdfs/HdfsPartialFileInputStream.java CHANGED

@@ -117,11 +117,12 @@ public class HdfsPartialFileInputStream extends InputStream
     {
         if (current >= start) {
             return;
         }
         if (start == 0) {
             current = 0;
-        } else {
+        }
+        else {
             current = original.skip(--start);
             if (current != start) {
                 throw new IOException("Cannot skip.");

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: embulk-input-hdfs
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.4
 platform: ruby
 authors:
 - takahiro.nakayama
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-09-09 00:00:00.000000000 Z
+date: 2015-10-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -82,7 +82,7 @@ files:
 - classpath/curator-client-2.6.0.jar
 - classpath/curator-framework-2.6.0.jar
 - classpath/curator-recipes-2.6.0.jar
-- classpath/embulk-input-hdfs-0.1.1.jar
+- classpath/embulk-input-hdfs-0.1.4.jar
 - classpath/gson-2.2.4.jar
 - classpath/hadoop-annotations-2.6.0.jar
 - classpath/hadoop-auth-2.6.0.jar