RubyGems - embulk-decoder-commons-compress - Versions diffs - 0.3.1 → 0.3.2 - Mend

embulk-decoder-commons-compress 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 47b9aa18792d34a7a3797a6d62d6a2fb2b53cd05
-  data.tar.gz: 0bc509836d83388502cc224d51a88abb4f3e8832
+  metadata.gz: 8c492e93cc8decc06fa54a952930d446748d1bc3
+  data.tar.gz: 3bc26ce03ac3fd8cbd450b12d5c798f1f0d88326
 SHA512:
-  metadata.gz: 39d37955797132ed222662abeddb0c7c03a3d455970107576c03291b9320f706ae6fd85cd2c5b163bb92ed26624231f13d24dcddfb213474759e5ff2337107a8
-  data.tar.gz: 8693b5648dd1975257d6be8269d82ac69e259150542e7e03d5764bc8c41cf24c1564ff65d7d051c8d9d344baa9fdb4277dab462f619027115d42d3c53586e5e5
+  metadata.gz: a3e7102ce968c9d4c7b7733025d8b141ee55c9dd4f3e730c05a0e9ea0d11f0ddadab2fb9ca34b107eb30949108cdb6c3eb669a31eb5111c95eae52c65c6360dd
+  data.tar.gz: aa39557828feb9906a43d061b2edecaf558bdcae8ff6b00bd6f3623ff37c09d5d3e905f18789c5b8b170a91cb41058ea229fd2df376a6487e644aacefe36c45d

data/.travis.yml ADDED

@@ -0,0 +1,3 @@
+language: java
+script: ./gradlew -DenableIntegrationTest=true gem

data/README.md CHANGED

@@ -1,5 +1,8 @@
 # Commons Compress decoder plugin for Embulk
+[![Build Status](https://travis-ci.org/hata/embulk-decoder-commons-compress.svg)](https://travis-ci.org/hata/embulk-decoder-commons-compress)
 This decoder plugin for Embulk supports various archive formats using [Apache Commons Compress](http://commons.apache.org/proper/commons-compress/) library.
 ## Overview
@@ -26,6 +29,8 @@ This decoder plugin for Embulk supports various archive formats using [Apache Co
   - tbz, tbz2, tb2, tar.bz2
   - taz, tz, tar.Z
+If input files are concatenated gzip or bzip2 format, please set format parameter explicitly.
 ## Example
 - Use auto detection. This can use for 1 format like tar and zip. If you would like to use a solid compression format like tar.gz, please set the format to your configuration file.
@@ -57,12 +62,36 @@ in:
       format: tgz
 ```
+- Set *format* parameter to handle concatenated gzip(or bzip2) file.
+```yaml
+in:
+  type: any input plugin type
+  decoders:
+    - type: commons-compress
+      format: gz
+```
+```yaml
+in:
+  type: any input plugin type
+  decoders:
+    - type: commons-compress
+      format: bzip2
+```
 ## Build
 ```
 $ ./gradlew gem
 ```
+To build with integrationTest(It works on OSX or Linux)
+```
+$ ./gradlew clean
+$ ./gradlew -DenableIntegrationTest=true gem
+```
 ## Reference
 - [Apache Commons Compress](http://commons.apache.org/proper/commons-compress/)

data/build.gradle CHANGED

@@ -5,6 +5,8 @@ plugins {
 }
 import com.github.jrubygradle.JRubyExec
+apply from: 'https://raw.githubusercontent.com/hata/gradle-plugins/master/embulk-integration-test.gradle'
 sourceCompatibility = '1.7'
 targetCompatibility = '1.7'
@@ -16,7 +18,7 @@ configurations {
     provided
 }
-version = "0.3.1"
+version = "0.3.2"
 dependencies {
     compile  "org.embulk:embulk-core:0.7.0"
@@ -61,3 +63,5 @@ Gem::Specification.new do |spec|
 end
 /$)
 }
+project.tasks.integrationTest.dependsOn(classpath)

data/src/integration-test/java/org/embulk/filter/TestIntegration.java ADDED

@@ -0,0 +1,138 @@
+package org.embulk.filter;
+import static org.junit.Assert.assertEquals;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+import org.junit.Test;
+public class TestIntegration {
+    static final String TEST_DIR = System.getProperty("embulk.integrationtest.dir");
+    private static final String[] SAMPLE_SRC_FILES = {"header.csv", "sample_1.csv", "sample_2.csv"};
+    private static final String[] SAMPLE_1_SRC_FILES = {"header.csv", "sample_1.csv"};
+    private static String getTestFile(String name) {
+        return TEST_DIR + File.separator + name;
+    }
+    @Test
+    public void testArchiveFormatZip() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_zip_000.00.csv"));
+    }
+    @Test
+    public void testArchiveFormatAr() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_ar_000.00.csv"));
+    }
+    @Test
+    public void testArchiveFormatTar() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_tar_000.00.csv"));
+    }
+    @Test
+    public void testCompressionFormatBzip2() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_1_SRC_FILES),
+                getChecksumFromFiles("result_bz2_000.00.csv"));
+    }
+    @Test
+    public void testCompressionFormatGzip() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_1_SRC_FILES),
+                getChecksumFromFiles("result_gz_000.00.csv"));
+    }
+    @Test
+    public void testSolidCompressionFormatTgz() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_tgz_000.00.csv"));
+    }
+    @Test
+    public void testSolidCompressionFormatTarBz2() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_tar.bz2_000.00.csv"));
+    }
+    @Test
+    public void testSolidCompressionFormatTarGz() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_tar.gz_000.00.csv"));
+    }
+    @Test
+    public void testSolidCompressionFormatTarZ() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_tar.Z_000.00.csv"));
+    }
+    @Test
+    public void testConcatenatedGZ() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_concatenated_gz_000.00.csv"));
+    }
+    @Test
+    public void testConcatenatedGzip() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_concatenated_gzip_000.00.csv"));
+    }
+    @Test
+    public void testConcatenatedBz2() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_concatenated_bz2_000.00.csv"));
+    }
+    @Test
+    public void testConcatenatedBzip2() throws Exception {
+        assertEquals("Verify input and output contents are identical.",
+                getChecksumFromFiles(SAMPLE_SRC_FILES),
+                getChecksumFromFiles("result_concatenated_bzip2_000.00.csv"));
+    }
+    private long getChecksumFromFiles(String ... files) throws IOException {
+        Checksum cksum = new CRC32();
+        for (String srcFile : files) {
+            try (BufferedReader reader = new BufferedReader(new FileReader(getTestFile(srcFile)))) {
+                getChecksum(cksum, reader);
+            }
+        }
+        return cksum.getValue();
+    }
+    private long getChecksum(Checksum cksum, BufferedReader reader) throws IOException {
+        String line = reader.readLine();
+        while (line != null) {
+            byte[] lineBuf = line.trim().getBytes();
+            if (lineBuf.length > 0) {
+                // System.out.println("line:" + new String(lineBuf));
+                cksum.update(lineBuf, 0, lineBuf.length);
+            }
+            line = reader.readLine();
+        }
+        return cksum.getValue();
+    }
+}

data/src/integration-test/resources/concatenated.csv.bz2 ADDED

Binary file

data/src/integration-test/resources/concatenated.csv.gz ADDED

Binary file

data/src/integration-test/resources/config_ar.yml ADDED

@@ -0,0 +1,26 @@
+in:
+  type: file
+  path_prefix: ./samples.ar
+  decoders:
+    - type: commons-compress
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_ar_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_bz2.yml ADDED

@@ -0,0 +1,26 @@
+in:
+  type: file
+  path_prefix: ./sample_1.csv.bz2
+  decoders:
+    - type: commons-compress
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_bz2_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_concatenated_bz2.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./concatenated.csv.bz2
+  decoders:
+    - type: commons-compress
+      format: bz2
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_concatenated_bz2_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_concatenated_bzip2.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./concatenated.csv.bz2
+  decoders:
+    - type: commons-compress
+      format: bzip2
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_concatenated_bzip2_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_concatenated_gz.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./concatenated.csv.gz
+  decoders:
+    - type: commons-compress
+      format: gz
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_concatenated_gz_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_concatenated_gzip.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./concatenated.csv.gz
+  decoders:
+    - type: commons-compress
+      format: gzip
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_concatenated_gzip_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_gz.yml ADDED

@@ -0,0 +1,26 @@
+in:
+  type: file
+  path_prefix: ./sample_1.csv.gz
+  decoders:
+    - type: commons-compress
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_gz_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_tar.Z.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./samples.tar.Z
+  decoders:
+    - type: commons-compress
+      format: tar.Z
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_tar.Z_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_tar.bz2.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./samples.tar.bz2
+  decoders:
+    - type: commons-compress
+      format: tar.bz2
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_tar.bz2_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_tar.gz.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./samples.tar.gz
+  decoders:
+    - type: commons-compress
+      format: tar.gz
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_tar.gz_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_tar.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./samples.tar
+  decoders:
+    - type: commons-compress
+      format: tar
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_tar_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_tgz.yml ADDED

@@ -0,0 +1,27 @@
+in:
+  type: file
+  path_prefix: ./samples.tgz
+  decoders:
+    - type: commons-compress
+      format: tgz
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_tgz_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/config_zip.yml ADDED

@@ -0,0 +1,26 @@
+in:
+  type: file
+  path_prefix: ./samples.zip
+  decoders:
+    - type: commons-compress
+  parser:
+    charset: UTF-8
+    newline: CRLF
+    type: csv
+    delimiter: ','
+    quote: '"'
+    trim_if_not_quoted: false
+    skip_header_lines: 0
+    allow_extra_columns: false
+    allow_optional_columns: false
+    columns:
+    - {name: id, type: long}
+    - {name: comment, type: string}
+out:
+  type: file
+  path_prefix: ./result_zip_
+  file_ext: csv
+  formatter:
+    type: csv
+    quote_policy: MINIMAL
+    newline: LF

data/src/integration-test/resources/header.csv ADDED

	@@ -0,0 +1,2 @@
1	+ id,comment
2	+

data/src/integration-test/resources/sample_0.tar ADDED

Binary file

data/src/integration-test/resources/sample_1.csv ADDED

	@@ -0,0 +1 @@
1	+ 1,foo

data/src/integration-test/resources/sample_1.csv.bz2 ADDED

Binary file

data/src/integration-test/resources/sample_1.csv.gz ADDED

Binary file

data/src/integration-test/resources/sample_1.tar ADDED

Binary file

data/src/integration-test/resources/sample_2.csv ADDED

	@@ -0,0 +1 @@
1	+ 2,bar

data/src/integration-test/resources/samples.ar ADDED

@@ -0,0 +1,5 @@
+!<arch>
+sample_1.csv    1425248767  501   20    100644  6         `
+1,foo
+sample_2.csv    1425099808  501   20    100644  6         `
+2,bar

data/src/integration-test/resources/samples.tar ADDED

Binary file

data/src/integration-test/resources/samples.tar.Z ADDED

Binary file

data/src/integration-test/resources/samples.tar.bz2 ADDED

Binary file

data/src/integration-test/resources/samples.tar.gz ADDED

Binary file

data/src/integration-test/resources/samples.tgz ADDED

Binary file

data/src/integration-test/resources/samples.zip ADDED

Binary file

data/src/main/java/org/embulk/decoder/CommonsCompressProvider.java CHANGED

@@ -13,6 +13,8 @@ import org.apache.commons.compress.archivers.ArchiveStreamFactory;
 import org.apache.commons.compress.compressors.CompressorException;
 import org.apache.commons.compress.compressors.CompressorInputStream;
 import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.embulk.decoder.CommonsCompressDecoderPlugin.PluginTask;
 import org.embulk.spi.util.FileInputInputStream;
 import org.embulk.spi.util.InputStreamFileInput.Provider;
@@ -105,19 +107,19 @@ class CommonsCompressProvider implements Provider {
      * (Actually, compressor formats can use two or more times in this code.
      *  But it is not common case.)
      */
-    Iterator<InputStream> createInputStreamIterator(String[] formats,
+    Iterator<InputStream> createInputStreamIterator(String[] inputFormats,
             int pos, InputStream in) throws IOException {
-        if (pos >= formats.length) {
+        if (pos >= inputFormats.length) {
             return toIterator(in);
         }
         try {
-            String format = formats[pos];
+            String format = inputFormats[pos];
             if (CommonsCompressUtil.isArchiveFormat(format)) {
                 return new ArchiveInputStreamIterator(
                         createArchiveInputStream(format, in));
             } else if (CommonsCompressUtil.isCompressorFormat(format)) {
-                return createInputStreamIterator(formats, pos + 1,
+                return createInputStreamIterator(inputFormats, pos + 1,
                         createCompressorInputStream(format, in));
             }
             throw new IOException("Unsupported format is configured. format:"
@@ -169,6 +171,12 @@ class CommonsCompressProvider implements Provider {
                         "Failed to detect a file format. Please try to set a format explicitly.",
                         e);
             }
+        }
+        if (CompressorStreamFactory.GZIP.equalsIgnoreCase(format)) {
+            return new GzipCompressorInputStream(in, true);
+        } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(format)) {
+            return new BZip2CompressorInputStream(in, true);
         } else {
             return factory.createCompressorInputStream(format, in);
         }

data/src/main/java/org/embulk/decoder/CommonsCompressUtil.java CHANGED

@@ -22,6 +22,8 @@ class CommonsCompressUtil {
         ArchiveStreamFactory.ZIP,
     };
+    // Even indexes have both extensions and aliases. And odd indexes are
+    // CompressorStreamFactory values.
     static final String[] compressorFormats = {
         CompressorStreamFactory.BZIP2,
         CompressorStreamFactory.DEFLATE,
@@ -32,6 +34,8 @@ class CommonsCompressUtil {
         CompressorStreamFactory.SNAPPY_RAW,
         CompressorStreamFactory.XZ,
         CompressorStreamFactory.Z,
+        "bz2", // These values should be handled by normalizeFormats
+        "gzip",
     };
     // This table is even indexes have short extensions and odd indexes has
@@ -94,7 +98,7 @@ class CommonsCompressUtil {
         if (isAutoDetect(format)) {
             return null;
         } else if (isArchiveFormat(format) || isCompressorFormat(format)) {
-            return splitAndReverse(format);
+            return normalizeFormats(splitAndReverse(format));
         }
         String[] formats = toSolidCompressionFormats(format);
@@ -102,7 +106,7 @@ class CommonsCompressUtil {
             return formats;
         }
-        formats = splitAndReverse(format);
+        formats = normalizeFormats(splitAndReverse(format));
         for (String s : formats) {
             if (!(isArchiveFormat(s) || isCompressorFormat(s))) {
@@ -132,4 +136,20 @@ class CommonsCompressUtil {
         Collections.reverse(result);
         return result.toArray(new String[result.size()]);
     }
+    private static String[] normalizeFormats(String... formats) {
+        if (formats == null || formats.length == 0) {
+            return formats;
+        }
+        for (int i = 0;i < formats.length;i++) {
+            if (formats[i].equalsIgnoreCase("gzip")) {
+                formats[i] = CompressorStreamFactory.GZIP;
+            } else if (formats[i].equalsIgnoreCase("bz2")) {
+                formats[i] = CompressorStreamFactory.BZIP2;
+            }
+        }
+        return formats;
+    }
 }

data/src/test/java/org/embulk/decoder/TestCommonsCompressProvider.java CHANGED

@@ -240,7 +240,43 @@ public class TestCommonsCompressProvider {
             verifyContents(it, "1,foo", "2,bar");
         }
     }
+    @Test
+    public void testCreateInputStreamConcatenatedGZ() throws Exception {
+        try (CommonsCompressProvider provider = new CommonsCompressProvider(task, files)) {
+            Iterator<InputStream> it = provider.createInputStreamIterator(
+                    new String[]{CompressorStreamFactory.GZIP}, 0, getResourceInputStream("concatenated.csv.gz"));
+            verifyContents(it, "1,foo\n2,bar");
+        }
+    }
+    @Test
+    public void testCreateInputStreamConcatenatedGZip() throws Exception {
+        try (CommonsCompressProvider provider = new CommonsCompressProvider(task, files)) {
+            Iterator<InputStream> it = provider.createInputStreamIterator(
+                    CommonsCompressUtil.toFormats("gzip"), 0, getResourceInputStream("concatenated.csv.gz"));
+            verifyContents(it, "1,foo\n2,bar");
+        }
+    }
+    @Test
+    public void testCreateInputStreamConcatenatedBZip2() throws Exception {
+        try (CommonsCompressProvider provider = new CommonsCompressProvider(task, files)) {
+            Iterator<InputStream> it = provider.createInputStreamIterator(
+                    new String[]{CompressorStreamFactory.BZIP2}, 0, getResourceInputStream("concatenated.csv.bz2"));
+            verifyContents(it, "1,foo\n2,bar");
+        }
+    }
+    @Test
+    public void testCreateInputStreamConcatenatedBZ2() throws Exception {
+        try (CommonsCompressProvider provider = new CommonsCompressProvider(task, files)) {
+            Iterator<InputStream> it = provider.createInputStreamIterator(
+                    CommonsCompressUtil.toFormats("bz2"), 0, getResourceInputStream("concatenated.csv.bz2"));
+            verifyContents(it, "1,foo\n2,bar");
+        }
+    }
     @Test
     public void testClose() throws Exception {
         CommonsCompressProvider provider = new CommonsCompressProvider(task, files);
@@ -303,7 +339,7 @@ public class TestCommonsCompressProvider {
         }
     }
-    @Test(expected=CompressorException.class)
+    @Test(expected=Exception.class)
     public void testCreateCompressorInputStreamWrongFormat() throws Exception {
         try (CommonsCompressProvider provider = new CommonsCompressProvider(task, files)) {
             provider.createCompressorInputStream("bzip2",

data/src/test/resources/org/embulk/decoder/concatenated.csv.bz2 ADDED

Binary file

data/src/test/resources/org/embulk/decoder/concatenated.csv.gz ADDED

Binary file

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: embulk-decoder-commons-compress
 version: !ruby/object:Gem::Version
-  version: 0.3.1
+  version: 0.3.2
 platform: ruby
 authors:
 - hata
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-28 00:00:00.000000000 Z
+date: 2016-03-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -46,6 +46,7 @@ extensions: []
 extra_rdoc_files: []
 files:
 - .gitignore
+- .travis.yml
 - LICENSE.txt
 - README.md
 - build.gradle
@@ -54,6 +55,36 @@ files:
 - gradlew
 - gradlew.bat
 - lib/embulk/decoder/commons-compress.rb
+- src/integration-test/java/org/embulk/filter/TestIntegration.java
+- src/integration-test/resources/concatenated.csv.bz2
+- src/integration-test/resources/concatenated.csv.gz
+- src/integration-test/resources/config_ar.yml
+- src/integration-test/resources/config_bz2.yml
+- src/integration-test/resources/config_concatenated_bz2.yml
+- src/integration-test/resources/config_concatenated_bzip2.yml
+- src/integration-test/resources/config_concatenated_gz.yml
+- src/integration-test/resources/config_concatenated_gzip.yml
+- src/integration-test/resources/config_gz.yml
+- src/integration-test/resources/config_tar.Z.yml
+- src/integration-test/resources/config_tar.bz2.yml
+- src/integration-test/resources/config_tar.gz.yml
+- src/integration-test/resources/config_tar.yml
+- src/integration-test/resources/config_tgz.yml
+- src/integration-test/resources/config_zip.yml
+- src/integration-test/resources/header.csv
+- src/integration-test/resources/sample_0.tar
+- src/integration-test/resources/sample_1.csv
+- src/integration-test/resources/sample_1.csv.bz2
+- src/integration-test/resources/sample_1.csv.gz
+- src/integration-test/resources/sample_1.tar
+- src/integration-test/resources/sample_2.csv
+- src/integration-test/resources/samples.ar
+- src/integration-test/resources/samples.tar
+- src/integration-test/resources/samples.tar.Z
+- src/integration-test/resources/samples.tar.bz2
+- src/integration-test/resources/samples.tar.gz
+- src/integration-test/resources/samples.tgz
+- src/integration-test/resources/samples.zip
 - src/main/java/org/embulk/decoder/ArchiveInputStreamIterator.java
 - src/main/java/org/embulk/decoder/CommonsCompressDecoderPlugin.java
 - src/main/java/org/embulk/decoder/CommonsCompressFileInput.java
@@ -64,6 +95,8 @@ files:
 - src/test/java/org/embulk/decoder/TestCommonsCompressFileInput.java
 - src/test/java/org/embulk/decoder/TestCommonsCompressProvider.java
 - src/test/java/org/embulk/decoder/TestCommonsCompressUtil.java
+- src/test/resources/org/embulk/decoder/concatenated.csv.bz2
+- src/test/resources/org/embulk/decoder/concatenated.csv.gz
 - src/test/resources/org/embulk/decoder/sample_0.tar
 - src/test/resources/org/embulk/decoder/sample_1.csv
 - src/test/resources/org/embulk/decoder/sample_1.csv.bz2
@@ -78,7 +111,7 @@ files:
 - src/test/resources/org/embulk/decoder/samples.tgz
 - src/test/resources/org/embulk/decoder/samples.zip
 - classpath/commons-compress-1.9.jar
-- classpath/embulk-decoder-commons-compress-0.3.1.jar
+- classpath/embulk-decoder-commons-compress-0.3.2.jar
 homepage: https://github.com/hata/embulk-decoder-commons-compress
 licenses:
 - MIT