embulk-input-hdfs 0.1.9 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHENGELOG.md +7 -0
- data/README.md +18 -15
- data/build.gradle +1 -1
- data/example/config.yml +4 -1
- data/example/data2.csv.gz +0 -0
- data/src/main/java/org/embulk/input/hdfs/ConfigurationBuilder.java +82 -0
- data/src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java +248 -212
- data/src/main/java/org/embulk/input/hdfs/PartialFile.java +48 -0
- data/src/main/java/org/embulk/input/hdfs/{HdfsPartialFileInputStream.java → PartialFileInputStream.java} +9 -4
- data/src/main/java/org/embulk/input/hdfs/PartialFileInputStreamBuilder.java +125 -0
- data/src/main/java/org/embulk/input/hdfs/PartialFileList.java +360 -0
- data/src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java +38 -14
- data/src/test/resources/sample_03.csv.gz +0 -0
- metadata +26 -21
- data/src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java +0 -40
- data/src/main/java/org/embulk/input/hdfs/HdfsPartialFile.java +0 -39
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: e666bbbcb18941dce84889c2ee7fb85d65edbaf4
         | 
| 4 | 
            +
              data.tar.gz: 7422b508396787d70e6cea3fc534739c2c20c825
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c305947dbd3f6bded0a23fbc06efd4d44e6d48cdb4b97c8b0e3861cd4b2a9800f6d8c93cf5280ccb235ca88346e727bb5fb549ae3c7bb2e12a13205e20765085
         | 
| 7 | 
            +
              data.tar.gz: 8f33bb06731a3c5a25dd723bef83616992ce5fc8b8d5e1a60d8a1da56421a42b49ae3397feb24134a093bf291af87ddbd208fa866c86fdd997d824a6077434a4
         | 
    
        data/CHENGELOG.md
    ADDED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -14,11 +14,12 @@ Read files on Hdfs. | |
| 14 14 |  | 
| 15 15 | 
             
            - **config_files** list of paths to Hadoop's configuration files (array of strings, default: `[]`)
         | 
| 16 16 | 
             
            - **config** overwrites configuration parameters (hash, default: `{}`)
         | 
| 17 | 
            -
            - **path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s | 
| 18 | 
            -
            - **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property.
         | 
| 19 | 
            -
            - **partition** when this is true, partition input files and increase task count. (default: `true`)
         | 
| 20 | 
            -
            - **num_partitions** number of partitions. (default: `Runtime.getRuntime().availableProcessors()`)
         | 
| 21 | 
            -
            - **skip_header_lines** Skip this number of lines first. Set 1 if the file has header line. (default: `0`)
         | 
| 17 | 
            +
            - **path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s` (string, required).
         | 
| 18 | 
            +
            - **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property. (long, default: `0`)
         | 
| 19 | 
            +
            - **partition** when this is true, partition input files and increase task count. (boolean, default: `true`)
         | 
| 20 | 
            +
            - **num_partitions** number of partitions. (long, default: `Runtime.getRuntime().availableProcessors()`)
         | 
| 21 | 
            +
            - **skip_header_lines** Skip this number of lines first. Set 1 if the file has header line. (long, default: `0`)
         | 
| 22 | 
            +
            - **decompression** Decompress compressed files by hadoop compression codec api. (boolean. default: `false`)
         | 
| 22 23 |  | 
| 23 24 | 
             
            ## Example
         | 
| 24 25 |  | 
| @@ -77,18 +78,20 @@ int partitionSizeByOneTask = totalFileLength / approximateNumPartitions; | |
| 77 78 | 
             
            ...
         | 
| 78 79 | 
             
            */
         | 
| 79 80 |  | 
| 80 | 
            -
                 | 
| 81 | 
            -
                if ( | 
| 82 | 
            -
                     | 
| 83 | 
            -
             | 
| 81 | 
            +
                long numPartitions;
         | 
| 82 | 
            +
                if (task.getPartition()) {
         | 
| 83 | 
            +
                    if (file.canDecompress()) {
         | 
| 84 | 
            +
                        numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
         | 
| 85 | 
            +
                    }
         | 
| 86 | 
            +
                    else if (file.getCodec() != null) { // if not null, the file is compressed.
         | 
| 87 | 
            +
                        numPartitions = 1;
         | 
| 88 | 
            +
                    }
         | 
| 89 | 
            +
                    else {
         | 
| 90 | 
            +
                        numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
         | 
| 91 | 
            +
                    }
         | 
| 84 92 | 
             
                }
         | 
| 85 | 
            -
                else if (!task.getPartition()) {
         | 
| 86 | 
            -
                    // if no partition mode, skip partitioning.
         | 
| 87 | 
            -
                    numPartitions = 1;
         | 
| 88 | 
            -
                } 
         | 
| 89 93 | 
             
                else {
         | 
| 90 | 
            -
                     | 
| 91 | 
            -
                    numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
         | 
| 94 | 
            +
                    numPartitions = 1;
         | 
| 92 95 | 
             
                }
         | 
| 93 96 |  | 
| 94 97 | 
             
            /*
         | 
    
        data/build.gradle
    CHANGED
    
    
    
        data/example/config.yml
    CHANGED
    
    | @@ -12,11 +12,14 @@ local_fs_example: &local_fs_example | |
| 12 12 | 
             
                fs.defaultFS: 'file:///'
         | 
| 13 13 | 
             
                fs.hdfs.impl: 'org.apache.hadoop.fs.LocalFileSystem'
         | 
| 14 14 | 
             
                fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
         | 
| 15 | 
            +
                io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
         | 
| 15 16 |  | 
| 16 17 | 
             
            in:
         | 
| 17 18 | 
             
              type: hdfs
         | 
| 18 19 | 
             
              <<: *local_fs_example
         | 
| 19 | 
            -
              path: example/data | 
| 20 | 
            +
              path: example/data*
         | 
| 21 | 
            +
              skip_header_lines: 1
         | 
| 22 | 
            +
              decompression: true
         | 
| 20 23 | 
             
              parser:
         | 
| 21 24 | 
             
                charset: UTF-8
         | 
| 22 25 | 
             
                newline: CRLF
         | 
| Binary file | 
| @@ -0,0 +1,82 @@ | |
| 1 | 
            +
            package org.embulk.input.hdfs;
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import com.google.common.collect.ImmutableList;
         | 
| 4 | 
            +
            import com.google.common.collect.ImmutableMap;
         | 
| 5 | 
            +
            import com.google.common.collect.Lists;
         | 
| 6 | 
            +
            import com.google.common.collect.Maps;
         | 
| 7 | 
            +
            import org.apache.hadoop.conf.Configuration;
         | 
| 8 | 
            +
            import org.embulk.config.ConfigException;
         | 
| 9 | 
            +
            import org.embulk.spi.Exec;
         | 
| 10 | 
            +
            import org.slf4j.Logger;
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            import java.io.File;
         | 
| 13 | 
            +
            import java.net.MalformedURLException;
         | 
| 14 | 
            +
            import java.util.List;
         | 
| 15 | 
            +
            import java.util.Map;
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            /**
         | 
| 18 | 
            +
             * Created by takahiro.nakayama on 2/22/16.
         | 
| 19 | 
            +
             */
         | 
| 20 | 
            +
            public class ConfigurationBuilder
         | 
| 21 | 
            +
            {
         | 
| 22 | 
            +
                private static final Logger logger = Exec.getLogger(ConfigurationBuilder.class);
         | 
| 23 | 
            +
                private final ImmutableList.Builder<String> configFilesBuilder;
         | 
| 24 | 
            +
                private final ImmutableMap.Builder<String, String> configMapBuilder;
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                public ConfigurationBuilder()
         | 
| 27 | 
            +
                {
         | 
| 28 | 
            +
                    this.configFilesBuilder = ImmutableList.builder();
         | 
| 29 | 
            +
                    this.configMapBuilder = ImmutableMap.builder();
         | 
| 30 | 
            +
                }
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                public ConfigurationBuilder addConfigFiles(List<String> configFiles)
         | 
| 33 | 
            +
                {
         | 
| 34 | 
            +
                    for (String configFile : configFiles) {
         | 
| 35 | 
            +
                        addConfigFile(configFile);
         | 
| 36 | 
            +
                    }
         | 
| 37 | 
            +
                    return this;
         | 
| 38 | 
            +
                }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                public ConfigurationBuilder addConfigFile(String configFile)
         | 
| 41 | 
            +
                {
         | 
| 42 | 
            +
                    configFilesBuilder.add(configFile);
         | 
| 43 | 
            +
                    return this;
         | 
| 44 | 
            +
                }
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                public ConfigurationBuilder addConfigMap(Map<String, String> configMap)
         | 
| 47 | 
            +
                {
         | 
| 48 | 
            +
                    for (Map.Entry<String, String> entry : configMap.entrySet()) {
         | 
| 49 | 
            +
                        addConfig(entry.getKey(), entry.getValue());
         | 
| 50 | 
            +
                    }
         | 
| 51 | 
            +
                    return this;
         | 
| 52 | 
            +
                }
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                public ConfigurationBuilder addConfig(String key, String value)
         | 
| 55 | 
            +
                {
         | 
| 56 | 
            +
                    configMapBuilder.put(key, value);
         | 
| 57 | 
            +
                    return this;
         | 
| 58 | 
            +
                }
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                public Configuration build()
         | 
| 61 | 
            +
                {
         | 
| 62 | 
            +
                    Configuration configuration = new Configuration();
         | 
| 63 | 
            +
                    for (String configFile : configFilesBuilder.build()) {
         | 
| 64 | 
            +
                        File file = new File(configFile);
         | 
| 65 | 
            +
                        try {
         | 
| 66 | 
            +
                            configuration.addResource(file.toURI().toURL());
         | 
| 67 | 
            +
                        }
         | 
| 68 | 
            +
                        catch (MalformedURLException e) {
         | 
| 69 | 
            +
                            throw new ConfigException(e);
         | 
| 70 | 
            +
                        }
         | 
| 71 | 
            +
                    }
         | 
| 72 | 
            +
                    for (Map.Entry<String, String> entry : configMapBuilder.build().entrySet()) {
         | 
| 73 | 
            +
                        configuration.set(entry.getKey(), entry.getValue());
         | 
| 74 | 
            +
                    }
         | 
| 75 | 
            +
                    // For debug
         | 
| 76 | 
            +
                    for (Map.Entry<String, String> entry : configuration) {
         | 
| 77 | 
            +
                        logger.trace("{}: {}", entry.getKey(), entry.getValue());
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
                    logger.trace("Resource Files: {}", configuration);
         | 
| 80 | 
            +
                    return configuration;
         | 
| 81 | 
            +
                }
         | 
| 82 | 
            +
            }
         | 
| @@ -1,12 +1,18 @@ | |
| 1 1 | 
             
            package org.embulk.input.hdfs;
         | 
| 2 2 |  | 
| 3 | 
            -
            import com.google.common. | 
| 3 | 
            +
            import com.google.common.annotations.VisibleForTesting;
         | 
| 4 | 
            +
            import com.google.common.base.Optional;
         | 
| 5 | 
            +
            import com.google.common.base.Throwables;
         | 
| 4 6 | 
             
            import com.google.common.collect.Lists;
         | 
| 7 | 
            +
            import com.google.common.collect.Maps;
         | 
| 5 8 | 
             
            import org.apache.hadoop.conf.Configuration;
         | 
| 6 9 | 
             
            import org.apache.hadoop.fs.FileStatus;
         | 
| 7 10 | 
             
            import org.apache.hadoop.fs.FileSystem;
         | 
| 8 11 | 
             
            import org.apache.hadoop.fs.Path;
         | 
| 12 | 
            +
            import org.apache.hadoop.fs.PathIOException;
         | 
| 9 13 | 
             
            import org.apache.hadoop.fs.PathNotFoundException;
         | 
| 14 | 
            +
            import org.apache.hadoop.io.compress.CompressionCodec;
         | 
| 15 | 
            +
            import org.apache.hadoop.io.compress.CompressionCodecFactory;
         | 
| 10 16 | 
             
            import org.embulk.config.Config;
         | 
| 11 17 | 
             
            import org.embulk.config.ConfigDefault;
         | 
| 12 18 | 
             
            import org.embulk.config.ConfigDiff;
         | 
| @@ -19,261 +25,142 @@ import org.embulk.spi.BufferAllocator; | |
| 19 25 | 
             
            import org.embulk.spi.Exec;
         | 
| 20 26 | 
             
            import org.embulk.spi.FileInputPlugin;
         | 
| 21 27 | 
             
            import org.embulk.spi.TransactionalFileInput;
         | 
| 22 | 
            -
            import org.embulk.spi.util. | 
| 28 | 
            +
            import org.embulk.spi.util.InputStreamFileInput;
         | 
| 23 29 | 
             
            import org.jruby.embed.ScriptingContainer;
         | 
| 24 30 | 
             
            import org.slf4j.Logger;
         | 
| 25 31 |  | 
| 26 | 
            -
            import javax.annotation.Nullable;
         | 
| 27 | 
            -
             | 
| 28 | 
            -
            import java.io.BufferedInputStream;
         | 
| 29 | 
            -
            import java.io.ByteArrayInputStream;
         | 
| 30 | 
            -
            import java.io.ByteArrayOutputStream;
         | 
| 31 | 
            -
            import java.io.File;
         | 
| 32 32 | 
             
            import java.io.IOException;
         | 
| 33 33 | 
             
            import java.io.InputStream;
         | 
| 34 | 
            -
            import java. | 
| 35 | 
            -
            import java.util.ArrayList;
         | 
| 34 | 
            +
            import java.util.Iterator;
         | 
| 36 35 | 
             
            import java.util.List;
         | 
| 37 36 | 
             
            import java.util.Map;
         | 
| 38 37 |  | 
| 39 38 | 
             
            public class HdfsFileInputPlugin
         | 
| 40 39 | 
             
                    implements FileInputPlugin
         | 
| 41 40 | 
             
            {
         | 
| 42 | 
            -
                private static final Logger logger = Exec.getLogger(HdfsFileInputPlugin.class);
         | 
| 43 | 
            -
                private static FileSystem fs;
         | 
| 44 | 
            -
             | 
| 45 41 | 
             
                public interface PluginTask
         | 
| 46 | 
            -
                        extends Task
         | 
| 42 | 
            +
                        extends Task, PartialFileList.Task
         | 
| 47 43 | 
             
                {
         | 
| 48 44 | 
             
                    @Config("config_files")
         | 
| 49 45 | 
             
                    @ConfigDefault("[]")
         | 
| 50 | 
            -
                     | 
| 46 | 
            +
                    List<String> getConfigFiles();
         | 
| 51 47 |  | 
| 52 48 | 
             
                    @Config("config")
         | 
| 53 49 | 
             
                    @ConfigDefault("{}")
         | 
| 54 | 
            -
                     | 
| 50 | 
            +
                    Map<String, String> getConfig();
         | 
| 55 51 |  | 
| 56 52 | 
             
                    @Config("path")
         | 
| 57 | 
            -
                     | 
| 53 | 
            +
                    String getPath();
         | 
| 58 54 |  | 
| 59 55 | 
             
                    @Config("rewind_seconds")
         | 
| 60 56 | 
             
                    @ConfigDefault("0")
         | 
| 61 | 
            -
                     | 
| 57 | 
            +
                    int getRewindSeconds();
         | 
| 62 58 |  | 
| 63 59 | 
             
                    @Config("partition")
         | 
| 64 60 | 
             
                    @ConfigDefault("true")
         | 
| 65 | 
            -
                     | 
| 61 | 
            +
                    boolean getPartition();
         | 
| 66 62 |  | 
| 67 63 | 
             
                    @Config("num_partitions") // this parameter is the approximate value.
         | 
| 68 64 | 
             
                    @ConfigDefault("-1")      // Default: Runtime.getRuntime().availableProcessors()
         | 
| 69 | 
            -
                     | 
| 65 | 
            +
                    long getApproximateNumPartitions();
         | 
| 70 66 |  | 
| 71 67 | 
             
                    @Config("skip_header_lines") // Skip this number of lines first. Set 1 if the file has header line.
         | 
| 72 68 | 
             
                    @ConfigDefault("0")          // The reason why the parameter is configured is that this plugin splits files.
         | 
| 73 | 
            -
                     | 
| 69 | 
            +
                    int getSkipHeaderLines();
         | 
| 74 70 |  | 
| 75 | 
            -
                     | 
| 71 | 
            +
                    @Config("decompression") // if true, decompress files by using compression codec
         | 
| 72 | 
            +
                    @ConfigDefault("false")  // when getting FileInputStream.
         | 
| 73 | 
            +
                    boolean getDecompression();
         | 
| 76 74 |  | 
| 77 | 
            -
                     | 
| 75 | 
            +
                    PartialFileList getPartialFileList();
         | 
| 76 | 
            +
                    void setPartialFileList(PartialFileList partialFileList);
         | 
| 78 77 |  | 
| 79 78 | 
             
                    @ConfigInject
         | 
| 80 | 
            -
                     | 
| 79 | 
            +
                    ScriptingContainer getJRuby();
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    @ConfigInject
         | 
| 82 | 
            +
                    BufferAllocator getBufferAllocator();
         | 
| 81 83 | 
             
                }
         | 
| 82 84 |  | 
| 85 | 
            +
                private static final Logger logger = Exec.getLogger(HdfsFileInputPlugin.class);
         | 
| 86 | 
            +
                private Optional<Configuration> configurationContainer = Optional.absent();
         | 
| 87 | 
            +
             | 
| 83 88 | 
             
                @Override
         | 
| 84 89 | 
             
                public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
         | 
| 85 90 | 
             
                {
         | 
| 86 91 | 
             
                    PluginTask task = config.loadConfig(PluginTask.class);
         | 
| 92 | 
            +
                    Configuration configuration = getConfiguration(task);
         | 
| 87 93 |  | 
| 88 94 | 
             
                    // listing Files
         | 
| 89 | 
            -
                    String pathString = strftime(task.getPath(), task.getRewindSeconds());
         | 
| 90 95 | 
             
                    try {
         | 
| 91 | 
            -
                         | 
| 96 | 
            +
                        FileSystem fs = getFS(configuration);
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                        String pathString = strftime(task.getJRuby(), task.getPath(), task.getRewindSeconds());
         | 
| 99 | 
            +
                        Path rootPath = new Path(pathString);
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                        List<Path> originalFileList = buildOriginalFileList(fs, rootPath);
         | 
| 92 102 |  | 
| 93 103 | 
             
                        if (originalFileList.isEmpty()) {
         | 
| 94 104 | 
             
                            throw new PathNotFoundException(pathString);
         | 
| 95 105 | 
             
                        }
         | 
| 96 106 |  | 
| 97 107 | 
             
                        logger.debug("embulk-input-hdfs: Loading target files: {}", originalFileList);
         | 
| 98 | 
            -
                         | 
| 108 | 
            +
                        PartialFileList list = buildPartialFileList(task, originalFileList);
         | 
| 109 | 
            +
                        task.setPartialFileList(list);
         | 
| 99 110 | 
             
                    }
         | 
| 100 111 | 
             
                    catch (IOException e) {
         | 
| 101 112 | 
             
                        logger.error(e.getMessage());
         | 
| 102 113 | 
             
                        throw new RuntimeException(e);
         | 
| 103 114 | 
             
                    }
         | 
| 104 115 |  | 
| 105 | 
            -
                    // log the detail of partial files.
         | 
| 106 | 
            -
                    for (HdfsPartialFile partialFile : task.getFiles()) {
         | 
| 107 | 
            -
                        logger.debug("embulk-input-hdfs: target file: {}, start: {}, end: {}",
         | 
| 108 | 
            -
                                partialFile.getPath(), partialFile.getStart(), partialFile.getEnd());
         | 
| 109 | 
            -
                    }
         | 
| 110 | 
            -
             | 
| 111 116 | 
             
                    // number of processors is same with number of targets
         | 
| 112 | 
            -
                    int taskCount = task. | 
| 117 | 
            +
                    int taskCount = task.getPartialFileList().getTaskCount();
         | 
| 113 118 | 
             
                    logger.info("embulk-input-hdfs: task size: {}", taskCount);
         | 
| 114 119 |  | 
| 115 120 | 
             
                    return resume(task.dump(), taskCount, control);
         | 
| 116 121 | 
             
                }
         | 
| 117 122 |  | 
| 118 | 
            -
                 | 
| 119 | 
            -
                public ConfigDiff resume(TaskSource taskSource,
         | 
| 120 | 
            -
                        int taskCount,
         | 
| 121 | 
            -
                        FileInputPlugin.Control control)
         | 
| 123 | 
            +
                private Configuration getConfiguration(PluginTask task)
         | 
| 122 124 | 
             
                {
         | 
| 123 | 
            -
                     | 
| 124 | 
            -
             | 
| 125 | 
            -
                     | 
| 126 | 
            -
             | 
| 127 | 
            -
                    // usually, yo use last_path
         | 
| 128 | 
            -
                    //if (task.getFiles().isEmpty()) {
         | 
| 129 | 
            -
                    //    if (task.getLastPath().isPresent()) {
         | 
| 130 | 
            -
                    //        configDiff.set("last_path", task.getLastPath().get());
         | 
| 131 | 
            -
                    //    }
         | 
| 132 | 
            -
                    //} else {
         | 
| 133 | 
            -
                    //    List<String> files = new ArrayList<String>(task.getFiles());
         | 
| 134 | 
            -
                    //    Collections.sort(files);
         | 
| 135 | 
            -
                    //    configDiff.set("last_path", files.get(files.size() - 1));
         | 
| 136 | 
            -
                    //}
         | 
| 137 | 
            -
             | 
| 138 | 
            -
                    return configDiff;
         | 
| 139 | 
            -
                }
         | 
| 125 | 
            +
                    if (configurationContainer.isPresent()) {
         | 
| 126 | 
            +
                        return configurationContainer.get();
         | 
| 127 | 
            +
                    }
         | 
| 140 128 |  | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 129 | 
            +
                    ConfigurationBuilder builder = new ConfigurationBuilder();
         | 
| 130 | 
            +
                    builder.addConfigFiles(task.getConfigFiles());
         | 
| 131 | 
            +
                    builder.addConfigMap(task.getConfig());
         | 
| 132 | 
            +
                    configurationContainer = Optional.of(builder.build());
         | 
| 133 | 
            +
                    return configurationContainer.get();
         | 
| 146 134 | 
             
                }
         | 
| 147 135 |  | 
| 148 | 
            -
                 | 
| 149 | 
            -
                public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
         | 
| 136 | 
            +
                private FileSystem getFS(Configuration configuration)
         | 
| 150 137 | 
             
                {
         | 
| 151 | 
            -
                    final PluginTask task = taskSource.loadTask(PluginTask.class);
         | 
| 152 | 
            -
             | 
| 153 | 
            -
                    InputStream input;
         | 
| 154 | 
            -
                    final HdfsPartialFile file = task.getFiles().get(taskIndex);
         | 
| 155 138 | 
             
                    try {
         | 
| 156 | 
            -
                         | 
| 157 | 
            -
                            input = new SequenceInputStream(getHeadersInputStream(task, file), openInputStream(task, file));
         | 
| 158 | 
            -
                        }
         | 
| 159 | 
            -
                        else {
         | 
| 160 | 
            -
                            input = openInputStream(task, file);
         | 
| 161 | 
            -
                        }
         | 
| 139 | 
            +
                        return FileSystem.get(configuration);
         | 
| 162 140 | 
             
                    }
         | 
| 163 141 | 
             
                    catch (IOException e) {
         | 
| 164 | 
            -
                         | 
| 165 | 
            -
                        throw new RuntimeException(e);
         | 
| 142 | 
            +
                        throw Throwables.propagate(e);
         | 
| 166 143 | 
             
                    }
         | 
| 167 | 
            -
             | 
| 168 | 
            -
                    return new InputStreamTransactionalFileInput(task.getBufferAllocator(), input)
         | 
| 169 | 
            -
                    {
         | 
| 170 | 
            -
                        @Override
         | 
| 171 | 
            -
                        public void abort()
         | 
| 172 | 
            -
                        { }
         | 
| 173 | 
            -
             | 
| 174 | 
            -
                        @Override
         | 
| 175 | 
            -
                        public TaskReport commit()
         | 
| 176 | 
            -
                        {
         | 
| 177 | 
            -
                            return Exec.newTaskReport();
         | 
| 178 | 
            -
                        }
         | 
| 179 | 
            -
                    };
         | 
| 180 144 | 
             
                }
         | 
| 181 145 |  | 
| 182 | 
            -
                 | 
| 183 | 
            -
             | 
| 146 | 
            +
                @VisibleForTesting
         | 
| 147 | 
            +
                String strftime(final ScriptingContainer jruby, final String format, final int rewindSeconds)
         | 
| 184 148 | 
             
                {
         | 
| 185 | 
            -
                     | 
| 186 | 
            -
                     | 
| 187 | 
            -
                    int skippedHeaders = 0;
         | 
| 188 | 
            -
             | 
| 189 | 
            -
                    try (BufferedInputStream in = new BufferedInputStream(fs.open(new Path(partialFile.getPath())))) {
         | 
| 190 | 
            -
                        while (true) {
         | 
| 191 | 
            -
                            int c = in.read();
         | 
| 192 | 
            -
                            if (c < 0) {
         | 
| 193 | 
            -
                                break;
         | 
| 194 | 
            -
                            }
         | 
| 195 | 
            -
             | 
| 196 | 
            -
                            header.write(c);
         | 
| 197 | 
            -
             | 
| 198 | 
            -
                            if (c == '\n') {
         | 
| 199 | 
            -
                                skippedHeaders++;
         | 
| 200 | 
            -
                            }
         | 
| 201 | 
            -
                            else if (c == '\r') {
         | 
| 202 | 
            -
                                int c2 = in.read();
         | 
| 203 | 
            -
                                if (c2 == '\n') {
         | 
| 204 | 
            -
                                    header.write(c2);
         | 
| 205 | 
            -
                                }
         | 
| 206 | 
            -
                                skippedHeaders++;
         | 
| 207 | 
            -
                            }
         | 
| 208 | 
            -
             | 
| 209 | 
            -
                            if (skippedHeaders >= task.getSkipHeaderLines()) {
         | 
| 210 | 
            -
                                break;
         | 
| 211 | 
            -
                            }
         | 
| 212 | 
            -
                        }
         | 
| 213 | 
            -
                    }
         | 
| 214 | 
            -
                    header.close();
         | 
| 215 | 
            -
                    return new ByteArrayInputStream(header.toByteArray());
         | 
| 149 | 
            +
                    String script = String.format("(Time.now - %d).strftime('%s')", rewindSeconds, format);
         | 
| 150 | 
            +
                    return jruby.runScriptlet(script).toString();
         | 
| 216 151 | 
             
                }
         | 
| 217 152 |  | 
| 218 | 
            -
                private  | 
| 219 | 
            -
                        throws IOException
         | 
| 153 | 
            +
                private List<Path> buildOriginalFileList(FileSystem fs, Path rootPath)
         | 
| 220 154 | 
             
                {
         | 
| 221 | 
            -
                     | 
| 222 | 
            -
                    InputStream original = fs.open(new Path(partialFile.getPath()));
         | 
| 223 | 
            -
                    return new HdfsPartialFileInputStream(original, partialFile.getStart(), partialFile.getEnd());
         | 
| 224 | 
            -
                }
         | 
| 155 | 
            +
                    List<Path> fileList = Lists.newArrayList();
         | 
| 225 156 |  | 
| 226 | 
            -
             | 
| 227 | 
            -
                     | 
| 228 | 
            -
             | 
| 229 | 
            -
                    if (fs == null) {
         | 
| 230 | 
            -
                        setFs(task);
         | 
| 231 | 
            -
                        return fs;
         | 
| 232 | 
            -
                    }
         | 
| 233 | 
            -
                    else {
         | 
| 234 | 
            -
                        return fs;
         | 
| 235 | 
            -
                    }
         | 
| 236 | 
            -
                }
         | 
| 237 | 
            -
             | 
| 238 | 
            -
                private static FileSystem setFs(final PluginTask task)
         | 
| 239 | 
            -
                        throws IOException
         | 
| 240 | 
            -
                {
         | 
| 241 | 
            -
                    Configuration configuration = new Configuration();
         | 
| 242 | 
            -
             | 
| 243 | 
            -
                    for (String configFile : task.getConfigFiles()) {
         | 
| 244 | 
            -
                        File file = new File(configFile);
         | 
| 245 | 
            -
                        configuration.addResource(file.toURI().toURL());
         | 
| 246 | 
            -
                    }
         | 
| 247 | 
            -
             | 
| 248 | 
            -
                    for (Map.Entry<String, String> entry : task.getConfig().entrySet()) {
         | 
| 249 | 
            -
                        configuration.set(entry.getKey(), entry.getValue());
         | 
| 157 | 
            +
                    final FileStatus[] entries;
         | 
| 158 | 
            +
                    try {
         | 
| 159 | 
            +
                        entries = fs.globStatus(rootPath);
         | 
| 250 160 | 
             
                    }
         | 
| 251 | 
            -
             | 
| 252 | 
            -
             | 
| 253 | 
            -
                    for (Map.Entry<String, String> entry : configuration) {
         | 
| 254 | 
            -
                        logger.trace("{}: {}", entry.getKey(), entry.getValue());
         | 
| 161 | 
            +
                    catch (IOException e) {
         | 
| 162 | 
            +
                        throw Throwables.propagate(e);
         | 
| 255 163 | 
             
                    }
         | 
| 256 | 
            -
                    logger.debug("Resource Files: {}", configuration);
         | 
| 257 | 
            -
             | 
| 258 | 
            -
                    fs = FileSystem.get(configuration);
         | 
| 259 | 
            -
                    return fs;
         | 
| 260 | 
            -
                }
         | 
| 261 | 
            -
             | 
| 262 | 
            -
                private String strftime(final String raw, final int rewindSeconds)
         | 
| 263 | 
            -
                {
         | 
| 264 | 
            -
                    ScriptingContainer jruby = new ScriptingContainer();
         | 
| 265 | 
            -
                    Object resolved = jruby.runScriptlet(
         | 
| 266 | 
            -
                            String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewindSeconds), raw));
         | 
| 267 | 
            -
                    return resolved.toString();
         | 
| 268 | 
            -
                }
         | 
| 269 | 
            -
             | 
| 270 | 
            -
                private List<String> buildFileList(final FileSystem fs, final String pathString)
         | 
| 271 | 
            -
                        throws IOException
         | 
| 272 | 
            -
                {
         | 
| 273 | 
            -
                    List<String> fileList = new ArrayList<>();
         | 
| 274 | 
            -
                    Path rootPath = new Path(pathString);
         | 
| 275 | 
            -
             | 
| 276 | 
            -
                    final FileStatus[] entries = fs.globStatus(rootPath);
         | 
| 277 164 | 
             
                    // `globStatus` does not throw PathNotFoundException.
         | 
| 278 165 | 
             
                    // return null instead.
         | 
| 279 166 | 
             
                    // see: https://github.com/apache/hadoop/blob/branch-2.7.0/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java#L286
         | 
| @@ -283,80 +170,229 @@ public class HdfsFileInputPlugin | |
| 283 170 |  | 
| 284 171 | 
             
                    for (FileStatus entry : entries) {
         | 
| 285 172 | 
             
                        if (entry.isDirectory()) {
         | 
| 286 | 
            -
                             | 
| 173 | 
            +
                            List<Path> subEntries = listRecursive(fs, entry);
         | 
| 174 | 
            +
                            fileList.addAll(subEntries);
         | 
| 287 175 | 
             
                        }
         | 
| 288 176 | 
             
                        else {
         | 
| 289 | 
            -
                            fileList.add(entry.getPath() | 
| 177 | 
            +
                            fileList.add(entry.getPath());
         | 
| 290 178 | 
             
                        }
         | 
| 291 179 | 
             
                    }
         | 
| 292 180 |  | 
| 293 181 | 
             
                    return fileList;
         | 
| 294 182 | 
             
                }
         | 
| 295 183 |  | 
| 296 | 
            -
                private List< | 
| 297 | 
            -
                        throws IOException
         | 
| 184 | 
            +
                private List<Path> listRecursive(final FileSystem fs, FileStatus status)
         | 
| 298 185 | 
             
                {
         | 
| 299 | 
            -
                    List< | 
| 186 | 
            +
                    List<Path> fileList = Lists.newArrayList();
         | 
| 300 187 | 
             
                    if (status.isDirectory()) {
         | 
| 301 | 
            -
                         | 
| 302 | 
            -
             | 
| 188 | 
            +
                        FileStatus[] entries;
         | 
| 189 | 
            +
                        try {
         | 
| 190 | 
            +
                            entries = fs.listStatus(status.getPath());
         | 
| 191 | 
            +
                        }
         | 
| 192 | 
            +
                        catch (IOException e) {
         | 
| 193 | 
            +
                            throw Throwables.propagate(e);
         | 
| 194 | 
            +
                        }
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                        for (FileStatus entry : entries) {
         | 
| 197 | 
            +
                            fileList.addAll(listRecursive(fs, entry));
         | 
| 303 198 | 
             
                        }
         | 
| 304 199 | 
             
                    }
         | 
| 305 200 | 
             
                    else {
         | 
| 306 | 
            -
                        fileList.add(status.getPath() | 
| 201 | 
            +
                        fileList.add(status.getPath());
         | 
| 307 202 | 
             
                    }
         | 
| 308 203 | 
             
                    return fileList;
         | 
| 309 204 | 
             
                }
         | 
| 310 205 |  | 
| 311 | 
            -
                private  | 
| 312 | 
            -
                        throws IOException
         | 
| 206 | 
            +
                private PartialFileList buildPartialFileList(PluginTask task, List<Path> pathList)
         | 
| 313 207 | 
             
                {
         | 
| 314 | 
            -
                     | 
| 315 | 
            -
                     | 
| 316 | 
            -
             | 
| 317 | 
            -
             | 
| 318 | 
            -
                        public Path apply(@Nullable String input)
         | 
| 319 | 
            -
                        {
         | 
| 320 | 
            -
                            return new Path(input);
         | 
| 321 | 
            -
                        }
         | 
| 322 | 
            -
                    });
         | 
| 208 | 
            +
                    Configuration configuration = getConfiguration(task);
         | 
| 209 | 
            +
                    FileSystem fs = getFS(configuration);
         | 
| 210 | 
            +
                    boolean shouldPartition = task.getPartition();
         | 
| 211 | 
            +
                    boolean shouldDecompress = task.getDecompression();
         | 
| 323 212 |  | 
| 213 | 
            +
                    Map<Path, Long> pathLengthMap = Maps.newHashMap();
         | 
| 324 214 | 
             
                    long totalFileLength = 0;
         | 
| 325 215 | 
             
                    for (Path path : pathList) {
         | 
| 326 | 
            -
                         | 
| 216 | 
            +
                        long fileLength = getHdfsFileLength(fs, path, shouldDecompress);
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                        if (fileLength <= 0) {
         | 
| 219 | 
            +
                            logger.info("Skip the 0 byte target file: {}", path);
         | 
| 220 | 
            +
                            continue;
         | 
| 221 | 
            +
                        }
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                        pathLengthMap.put(path, fileLength);
         | 
| 224 | 
            +
                        totalFileLength += fileLength;
         | 
| 327 225 | 
             
                    }
         | 
| 226 | 
            +
                    if (totalFileLength <= 0) {
         | 
| 227 | 
            +
                        throw Throwables.propagate(new PathIOException(task.getPath(), "All files are empty"));
         | 
| 228 | 
            +
                    }
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    PartialFileList.Builder builder = new PartialFileList.Builder(task);
         | 
| 328 231 |  | 
| 329 232 | 
             
                    // TODO: optimum allocation of resources
         | 
| 330 | 
            -
                    long approximateNumPartitions | 
| 331 | 
            -
             | 
| 233 | 
            +
                    final long approximateNumPartitions;
         | 
| 234 | 
            +
                    if (task.getApproximateNumPartitions() <= 0) {
         | 
| 235 | 
            +
                        approximateNumPartitions = Runtime.getRuntime().availableProcessors();
         | 
| 236 | 
            +
                    }
         | 
| 237 | 
            +
                    else {
         | 
| 238 | 
            +
                        approximateNumPartitions = task.getApproximateNumPartitions();
         | 
| 239 | 
            +
                    }
         | 
| 240 | 
            +
             | 
| 332 241 | 
             
                    long partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
         | 
| 333 242 | 
             
                    if (partitionSizeByOneTask <= 0) {
         | 
| 334 243 | 
             
                        partitionSizeByOneTask = 1;
         | 
| 335 244 | 
             
                    }
         | 
| 336 245 |  | 
| 337 | 
            -
                     | 
| 338 | 
            -
             | 
| 339 | 
            -
                        long fileLength =  | 
| 340 | 
            -
                        if (fileLength <= 0) {
         | 
| 341 | 
            -
                            logger.info("embulk-input-hdfs: Skip the 0 byte target file: {}", path);
         | 
| 342 | 
            -
                            continue;
         | 
| 343 | 
            -
                        }
         | 
| 246 | 
            +
                    for (Map.Entry<Path, Long> entry : pathLengthMap.entrySet()) {
         | 
| 247 | 
            +
                        Path path = entry.getKey();
         | 
| 248 | 
            +
                        long fileLength = entry.getValue();
         | 
| 344 249 |  | 
| 345 250 | 
             
                        long numPartitions;
         | 
| 346 | 
            -
                        if ( | 
| 347 | 
            -
                             | 
| 251 | 
            +
                        if (shouldPartition) {
         | 
| 252 | 
            +
                            if (shouldDecompress && getHdfsFileCompressionCodec(fs, path) != null) {
         | 
| 253 | 
            +
                                numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
         | 
| 254 | 
            +
                            }
         | 
| 255 | 
            +
                            else if (getHdfsFileCompressionCodec(fs, path) != null) { // if not null, the file is compressed.
         | 
| 256 | 
            +
                                numPartitions = 1;
         | 
| 257 | 
            +
                            }
         | 
| 258 | 
            +
                            else {
         | 
| 259 | 
            +
                                numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
         | 
| 260 | 
            +
                            }
         | 
| 348 261 | 
             
                        }
         | 
| 349 | 
            -
                        else  | 
| 262 | 
            +
                        else {
         | 
| 350 263 | 
             
                            numPartitions = 1;
         | 
| 351 264 | 
             
                        }
         | 
| 352 | 
            -
             | 
| 353 | 
            -
             | 
| 265 | 
            +
             | 
| 266 | 
            +
                        for (long i = 0; i < numPartitions; i++) {
         | 
| 267 | 
            +
                            long start = fileLength * i / numPartitions;
         | 
| 268 | 
            +
                            long end = fileLength * (i + 1) / numPartitions;
         | 
| 269 | 
            +
                            if (start < end) {
         | 
| 270 | 
            +
                                logger.debug("PartialFile: path {}, start: {}, end: {}", path, start, end);
         | 
| 271 | 
            +
                                builder.add(path.toString(), start, end, shouldDecompress && getHdfsFileCompressionCodec(fs, path) != null);
         | 
| 272 | 
            +
                            }
         | 
| 354 273 | 
             
                        }
         | 
| 274 | 
            +
                    }
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                    return builder.build();
         | 
| 277 | 
            +
                }
         | 
| 355 278 |  | 
| 356 | 
            -
             | 
| 357 | 
            -
             | 
| 279 | 
            +
                private Long getHdfsFileLength(FileSystem fs, Path path, boolean shouldDecompression)
         | 
| 280 | 
            +
                {
         | 
| 281 | 
            +
                    CompressionCodec codec = getHdfsFileCompressionCodec(fs, path);
         | 
| 282 | 
            +
                    if (codec == null) {
         | 
| 283 | 
            +
                        try {
         | 
| 284 | 
            +
                            return fs.getFileStatus(path).getLen();
         | 
| 285 | 
            +
                        }
         | 
| 286 | 
            +
                        catch (IOException e) {
         | 
| 287 | 
            +
                            throw Throwables.propagate(e);
         | 
| 288 | 
            +
                        }
         | 
| 289 | 
            +
                    }
         | 
| 290 | 
            +
                    else if (!shouldDecompression) {
         | 
| 291 | 
            +
                        try {
         | 
| 292 | 
            +
                            return fs.getFileStatus(path).getLen();
         | 
| 293 | 
            +
                        }
         | 
| 294 | 
            +
                        catch (IOException e) {
         | 
| 295 | 
            +
                            throw Throwables.propagate(e);
         | 
| 296 | 
            +
                        }
         | 
| 358 297 | 
             
                    }
         | 
| 298 | 
            +
                    else {
         | 
| 299 | 
            +
                        long fileLength = 0;
         | 
| 300 | 
            +
                        try (InputStream is = codec.createInputStream(fs.open(path))) {
         | 
| 301 | 
            +
                            while (is.read() > 0) {
         | 
| 302 | 
            +
                                fileLength++;
         | 
| 303 | 
            +
                            }
         | 
| 304 | 
            +
                        }
         | 
| 305 | 
            +
                        catch (IOException e) {
         | 
| 306 | 
            +
                            throw Throwables.propagate(e);
         | 
| 307 | 
            +
                        }
         | 
| 308 | 
            +
                        return fileLength;
         | 
| 309 | 
            +
                    }
         | 
| 310 | 
            +
                }
         | 
| 359 311 |  | 
| 360 | 
            -
             | 
| 312 | 
            +
                private CompressionCodec getHdfsFileCompressionCodec(FileSystem fs, Path path)
         | 
| 313 | 
            +
                {
         | 
| 314 | 
            +
                    return getHdfsFileCompressionCodec(fs.getConf(), path);
         | 
| 315 | 
            +
                }
         | 
| 316 | 
            +
             | 
| 317 | 
            +
                private CompressionCodec getHdfsFileCompressionCodec(Configuration configuration, Path path)
         | 
| 318 | 
            +
                {
         | 
| 319 | 
            +
                    return new CompressionCodecFactory(configuration).getCodec(path);
         | 
| 320 | 
            +
                }
         | 
| 321 | 
            +
             | 
| 322 | 
            +
                @Override
         | 
| 323 | 
            +
                public ConfigDiff resume(TaskSource taskSource,
         | 
| 324 | 
            +
                        int taskCount,
         | 
| 325 | 
            +
                        FileInputPlugin.Control control)
         | 
| 326 | 
            +
                {
         | 
| 327 | 
            +
                    control.run(taskSource, taskCount);
         | 
| 328 | 
            +
                    ConfigDiff configDiff = Exec.newConfigDiff();
         | 
| 329 | 
            +
                    return configDiff;
         | 
| 330 | 
            +
                }
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                @Override
         | 
| 333 | 
            +
                public void cleanup(TaskSource taskSource,
         | 
| 334 | 
            +
                        int taskCount,
         | 
| 335 | 
            +
                        List<TaskReport> successTaskReports)
         | 
| 336 | 
            +
                {
         | 
| 337 | 
            +
                }
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                @Override
         | 
| 340 | 
            +
                public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
         | 
| 341 | 
            +
                {
         | 
| 342 | 
            +
                    final PluginTask task = taskSource.loadTask(PluginTask.class);
         | 
| 343 | 
            +
                    return new HdfsFileInput(task, taskIndex);
         | 
| 344 | 
            +
                }
         | 
| 345 | 
            +
             | 
| 346 | 
            +
                public class HdfsFileInput
         | 
| 347 | 
            +
                        extends InputStreamFileInput
         | 
| 348 | 
            +
                        implements TransactionalFileInput
         | 
| 349 | 
            +
                {
         | 
| 350 | 
            +
             | 
| 351 | 
            +
                    public HdfsFileInput(PluginTask task, int taskIndex)
         | 
| 352 | 
            +
                    {
         | 
| 353 | 
            +
                        super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
         | 
| 354 | 
            +
                    }
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                    @Override
         | 
| 357 | 
            +
                    public void abort()
         | 
| 358 | 
            +
                    {
         | 
| 359 | 
            +
                    }
         | 
| 360 | 
            +
             | 
| 361 | 
            +
                    @Override
         | 
| 362 | 
            +
                    public TaskReport commit()
         | 
| 363 | 
            +
                    {
         | 
| 364 | 
            +
                        return Exec.newTaskReport();
         | 
| 365 | 
            +
                    }
         | 
| 366 | 
            +
                }
         | 
| 367 | 
            +
             | 
| 368 | 
            +
                // TODO create single-file InputStreamFileInput utility
         | 
| 369 | 
            +
                private class SingleFileProvider
         | 
| 370 | 
            +
                        implements InputStreamFileInput.Provider
         | 
| 371 | 
            +
                {
         | 
| 372 | 
            +
                    private final FileSystem fs;
         | 
| 373 | 
            +
                    private final int numHeaderLines;
         | 
| 374 | 
            +
                    private final Iterator<PartialFile> iterator;
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                    public SingleFileProvider(PluginTask task, int taskIndex)
         | 
| 377 | 
            +
                    {
         | 
| 378 | 
            +
                        this.fs = getFS(getConfiguration(task));
         | 
| 379 | 
            +
                        this.numHeaderLines = task.getSkipHeaderLines();
         | 
| 380 | 
            +
                        this.iterator = task.getPartialFileList().get(taskIndex).iterator();
         | 
| 381 | 
            +
                    }
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                    @Override
         | 
| 384 | 
            +
                    public InputStream openNext() throws IOException
         | 
| 385 | 
            +
                    {
         | 
| 386 | 
            +
                        if (!iterator.hasNext()) {
         | 
| 387 | 
            +
                            return null;
         | 
| 388 | 
            +
                        }
         | 
| 389 | 
            +
                        PartialFileInputStreamBuilder builder = new PartialFileInputStreamBuilder(fs, iterator.next()).withHeaders(numHeaderLines);
         | 
| 390 | 
            +
                        return builder.build();
         | 
| 391 | 
            +
                    }
         | 
| 392 | 
            +
             | 
| 393 | 
            +
                    @Override
         | 
| 394 | 
            +
                    public void close()
         | 
| 395 | 
            +
                    {
         | 
| 396 | 
            +
                    }
         | 
| 361 397 | 
             
                }
         | 
| 362 398 | 
             
            }
         |