embulk-decoder-unzip 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7752a0d8a9deefa4462b42140a47b76de3d7f21
4
- data.tar.gz: 936158b3cdbff5031e5263dc9a97cfe0ec90495f
3
+ metadata.gz: 102affbd723ee4a9398356eadbd7fca47b480ad5
4
+ data.tar.gz: 6cdc51537a8b63b225d99af80593e33c038e550c
5
5
  SHA512:
6
- metadata.gz: 6d6a2dc184c041d52b38a1027ca4a619c803af4814cece3cdc7cfea1e564bd488780bce1588d0c5244b23e60a1fea265d2ea1119c3875600752993b7cceea5c2
7
- data.tar.gz: 2c0ae19262466aff5c6f359648a60e5b9ecf55d93b161117912a970fdacc115815cf09d2eea9cbef83ccc8e148a5b5ed17dc934a73176c376b4fdb700986bf66
6
+ metadata.gz: 4df92adfe66b3aeec9ecd8bd4a422554f1992e127d7e7e17df057f21e5900c1397ba4b1bfd6e96e71b669ad3d4947c35636d961df04c3e06818a88034666d9ce
7
+ data.tar.gz: f832f84ee19656949144fd62b507da09d76a0d6f8b0f2f9af17ee2d52124b847e31065918a03588937e5a23153a902b4ed1620d3fb43a9293da3c0bbf99e92c8
data/README.md CHANGED
@@ -1,38 +1,4 @@
1
- # Unzip decoder plugin for Embulk
1
+ Original code is copied from https://github.com/hata/embulk-decoder-commons-compress
2
2
 
3
- TODO: Write short description here and build.gradle file.
4
-
5
- ## Overview
6
-
7
- * **Plugin type**: decoder
8
- * **Guess supported**: no
9
-
10
- ## Configuration
11
-
12
- - **option1**: description (integer, required)
13
- - **option2**: description (string, default: `"myvalue"`)
14
- - **option3**: description (string, default: `null`)
15
-
16
- ## Example
17
-
18
- ```yaml
19
- in:
20
- type: any output input plugin type
21
- decoders:
22
- - type: unzip
23
- option1: example1
24
- option2: example2
25
- ```
26
-
27
- (If guess supported) you don't have to write `decoder:` section in the configuration file. After writing `in:` section, you can let embulk guess `decoder:` section using this command:
28
-
29
- ```
30
- $ embulk gem install embulk-decoder-unzip
31
- $ embulk guess -g unzip config.yml -o guessed.yml
32
- ```
33
-
34
- ## Build
35
-
36
- ```
37
- $ ./gradlew gem # -t to watch change of files and rebuild continuously
38
- ```
3
+ This is just an upgrade of common-compress from 1.13 to 1.20 It's a solution to the unzip error.
4
+ The reason for this is to deal with unzip errors.
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.1.2"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -21,6 +21,7 @@ targetCompatibility = 1.8
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.9.23"
23
23
  provided "org.embulk:embulk-core:0.9.23"
24
+ compile "org.apache.commons:commons-compress:1.20"
24
25
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
25
26
  testCompile "junit:junit:4.+"
26
27
  }
@@ -0,0 +1,87 @@
1
+ package org.embulk.decoder.unzip;
2
+
3
+ import java.io.IOException;
4
+ import java.io.InputStream;
5
+ import java.util.Iterator;
6
+
7
+ import org.apache.commons.compress.archivers.ArchiveEntry;
8
+ import org.apache.commons.compress.archivers.ArchiveInputStream;
9
+
10
+ class ArchiveInputStreamIterator implements Iterator<InputStream> {
11
+ private ArchiveInputStream ain;
12
+ private ArchiveEntry entry;
13
+ private String matchRegex = "";
14
+ private boolean endOfArchive = false;
15
+
16
+ ArchiveInputStreamIterator(ArchiveInputStream ain)
17
+ {
18
+ this.ain = ain;
19
+ }
20
+
21
+ ArchiveInputStreamIterator(ArchiveInputStream ain, String matchRegex) {
22
+ this.ain = ain;
23
+ this.matchRegex = matchRegex;
24
+ }
25
+
26
+ @Override
27
+ public boolean hasNext() {
28
+ try {
29
+ return checkNext();
30
+ } catch (IOException e) {
31
+ throw new RuntimeException(e);
32
+ }
33
+ }
34
+
35
+ @Override
36
+ public InputStream next() {
37
+ try {
38
+ if (checkNext()) {
39
+ entry = null;
40
+ } else {
41
+ return null;
42
+ }
43
+
44
+ return ain;
45
+ } catch (IOException e) {
46
+ throw new RuntimeException(e);
47
+ }
48
+ }
49
+
50
+ @Override
51
+ public void remove() {
52
+ throw new UnsupportedOperationException();
53
+ }
54
+
55
+ private boolean checkNext() throws IOException {
56
+ if (endOfArchive) {
57
+ return false;
58
+ } else if (entry != null) {
59
+ return true;
60
+ }
61
+
62
+ while (true) {
63
+ entry = ain.getNextEntry();
64
+ if (entry == null) {
65
+ endOfArchive = true;
66
+ return false;
67
+ } else if (entry.isDirectory()) {
68
+ continue;
69
+ } else if (!matchName(entry, matchRegex)){
70
+ continue;
71
+ } else {
72
+ return true;
73
+ }
74
+ }
75
+ }
76
+
77
+ private boolean matchName(ArchiveEntry entry, String regex) {
78
+ String name = entry.getName();
79
+ if(regex == null || regex.equals("")){
80
+ return true;
81
+ } else if(name == null) {
82
+ return false;
83
+ } else {
84
+ return name.matches(regex);
85
+ }
86
+ }
87
+ }
@@ -0,0 +1,73 @@
1
+ package org.embulk.decoder.unzip;
2
+
3
+ import java.io.IOException;
4
+ import java.io.InputStream;
5
+
6
+ import org.embulk.spi.Buffer;
7
+ import org.embulk.spi.BufferAllocator;
8
+ import org.embulk.spi.FileInput;
9
+ import org.embulk.spi.util.InputStreamFileInput.Provider;
10
+
11
+
12
+ class CommonsCompressFileInput implements FileInput
13
+ {
14
+ private final BufferAllocator allocator;
15
+ private final Provider provider;
16
+ private InputStream current;
17
+
18
+ public CommonsCompressFileInput(BufferAllocator allocator, Provider provider)
19
+ {
20
+ this.allocator = allocator;
21
+ this.provider = provider;
22
+ this.current = null;
23
+ }
24
+
25
+ @Override
26
+ public Buffer poll()
27
+ {
28
+ if (current == null) {
29
+ throw new IllegalStateException("nextFile() must be called before poll()");
30
+ }
31
+ Buffer buffer = allocator.allocate();
32
+ try {
33
+ int n = current.read(buffer.array(), buffer.offset(), buffer.capacity());
34
+ if (n < 0) {
35
+ return null;
36
+ }
37
+ buffer.limit(n);
38
+ Buffer b = buffer;
39
+ buffer = null;
40
+ return b;
41
+ } catch (IOException ex) {
42
+ throw new RuntimeException(ex);
43
+ } finally {
44
+ if (buffer != null) {
45
+ buffer.release();
46
+ buffer = null;
47
+ }
48
+ }
49
+ }
50
+
51
+ @Override
52
+ public boolean nextFile()
53
+ {
54
+ try {
55
+ // NOTE: DO NOT close current because this stream may
56
+ // be one of a file in an archive. Provider manage it.
57
+ current = provider.openNext();
58
+ return current != null;
59
+ } catch (IOException ex) {
60
+ throw new RuntimeException(ex);
61
+ }
62
+ }
63
+
64
+ @Override
65
+ public void close()
66
+ {
67
+ try {
68
+ provider.close();
69
+ } catch (IOException ex) {
70
+ throw new RuntimeException(ex);
71
+ }
72
+ }
73
+ }
@@ -0,0 +1,190 @@
1
+ package org.embulk.decoder.unzip;
2
+
3
+ import java.io.BufferedInputStream;
4
+ import java.io.IOException;
5
+ import java.io.InputStream;
6
+ import java.util.ArrayList;
7
+ import java.util.Iterator;
8
+ import java.util.List;
9
+
10
+ import org.apache.commons.compress.archivers.ArchiveException;
11
+ import org.apache.commons.compress.archivers.ArchiveInputStream;
12
+ import org.apache.commons.compress.archivers.ArchiveStreamFactory;
13
+ import org.apache.commons.compress.compressors.CompressorException;
14
+ import org.apache.commons.compress.compressors.CompressorInputStream;
15
+ import org.apache.commons.compress.compressors.CompressorStreamFactory;
16
+ import org.embulk.decoder.unzip.UnzipDecoderPlugin.PluginTask;
17
+ import org.embulk.spi.util.FileInputInputStream;
18
+ import org.embulk.spi.util.InputStreamFileInput.Provider;
19
+
20
+ class CommonsCompressProvider implements Provider {
21
+ private static final String AUTO_DETECT_FORMAT = "";
22
+
23
+ private final FileInputInputStream files;
24
+ private final boolean formatAutoDetection;
25
+ private Iterator<InputStream> inputStreamIterator;
26
+ private String[] formats;
27
+ private final boolean decompressConcatenated;
28
+ private final String matchName;
29
+
30
+ CommonsCompressProvider(PluginTask task, FileInputInputStream files) {
31
+ this.files = files;
32
+ this.formatAutoDetection = task == null
33
+ || CommonsCompressUtil.isAutoDetect(task.getFormat());
34
+ if (!this.formatAutoDetection) {
35
+ formats = CommonsCompressUtil.toFormats(task.getFormat());
36
+ if (formats == null) {
37
+ throw new RuntimeException("Failed to get a format.");
38
+ }
39
+ }
40
+ this.decompressConcatenated = task == null
41
+ || task.getDecompressConcatenated();
42
+ this.matchName = (task == null)? "" : task.getMatchName();
43
+ }
44
+
45
+ @Override
46
+ public InputStream openNext() throws IOException {
47
+ while (true) {
48
+ if (inputStreamIterator == null) {
49
+ if (!files.nextFile()) {
50
+ return null;
51
+ }
52
+ inputStreamIterator = formatAutoDetection ? createInputStreamIterator(files)
53
+ : createInputStreamIterator(formats, 0, files);
54
+ } else {
55
+ if (inputStreamIterator.hasNext()) {
56
+ InputStream in = inputStreamIterator.next();
57
+ if (in == null) {
58
+ inputStreamIterator = null;
59
+ } else {
60
+ return in;
61
+ }
62
+ } else {
63
+ inputStreamIterator = null;
64
+ }
65
+ }
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public void close() throws IOException {
71
+ inputStreamIterator = null;
72
+ if (files != null) {
73
+ files.close();
74
+ }
75
+ }
76
+
77
+ boolean isFormatAutoDetection() {
78
+ return formatAutoDetection;
79
+ }
80
+
81
+ String[] getFormats() {
82
+ return formats;
83
+ }
84
+
85
+ Iterator<InputStream> createInputStreamIterator(InputStream in)
86
+ throws IOException {
87
+ // It is required to support mark to detect a file format.
88
+ in = in.markSupported() ? in : new BufferedInputStream(in);
89
+ try {
90
+ return new ArchiveInputStreamIterator(
91
+ createArchiveInputStream(AUTO_DETECT_FORMAT, in),
92
+ this.matchName
93
+ );
94
+ } catch (IOException | ArchiveException e) {
95
+ // ArchiveStreamFactory set mark and reset the stream.
96
+ // So, we can use the same stream to check compressor.
97
+ try {
98
+ return toIterator(createCompressorInputStream(AUTO_DETECT_FORMAT, in));
99
+ } catch (CompressorException e2) {
100
+ throw new IOException("Failed to detect a file format.", e2);
101
+ }
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Create iterator to list InputStream for each archived/compressed file.
107
+ *
108
+ * This can handle like the following formats:
109
+ * 1 archived format which defined in ArchiveStreamFactory(e.g. tar)
110
+ * 1 archived format and 1 compressor format defined in CompressorStreamFactory.(e.g. tar.bz2)
111
+ * 1 compressor format defined in CompressorStreamFactory.(e.g. bz2)
112
+ * (Actually, compressor formats can use two or more times in this code.
113
+ * But it is not common case.)
114
+ */
115
+ Iterator<InputStream> createInputStreamIterator(String[] inputFormats,
116
+ int pos, InputStream in) throws IOException {
117
+ if (pos >= inputFormats.length) {
118
+ return toIterator(in);
119
+ }
120
+
121
+ try {
122
+ String format = inputFormats[pos];
123
+ if (CommonsCompressUtil.isArchiveFormat(format)) {
124
+ return new ArchiveInputStreamIterator(
125
+ createArchiveInputStream(format, in));
126
+ } else if (CommonsCompressUtil.isCompressorFormat(format)) {
127
+ return createInputStreamIterator(inputFormats, pos + 1,
128
+ createCompressorInputStream(format, in));
129
+ }
130
+ throw new IOException("Unsupported format is configured. format:"
131
+ + format);
132
+ } catch (ArchiveException | CompressorException e) {
133
+ throw new IOException(e);
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Create a new ArchiveInputStream to read an archive file based on a format
139
+ * parameter.
140
+ *
141
+ * If format is not set, this method tries to detect file format
142
+ * automatically. In this case, BufferedInputStream is used to wrap
143
+ * FileInputInputStream instance. BufferedInputStream may read a data
144
+ * partially when calling files.nextFile(). However, it doesn't matter
145
+ * because the partial read data should be discarded. And then this method
146
+ * is called again to create a new ArchiveInputStream.
147
+ *
148
+ * @return a new ArchiveInputStream instance.
149
+ */
150
+ ArchiveInputStream createArchiveInputStream(String format, InputStream in)
151
+ throws IOException, ArchiveException {
152
+ ArchiveStreamFactory factory = new ArchiveStreamFactory();
153
+ if (CommonsCompressUtil.isAutoDetect(format)) {
154
+ in = in.markSupported() ? in : new BufferedInputStream(in);
155
+ try {
156
+ return factory.createArchiveInputStream(in);
157
+ } catch (ArchiveException e) {
158
+ throw new IOException(
159
+ "Failed to detect a file format. Please try to set a format explicitly.",
160
+ e);
161
+ }
162
+ } else {
163
+ return factory.createArchiveInputStream(format, in);
164
+ }
165
+ }
166
+
167
+ CompressorInputStream createCompressorInputStream(String format,
168
+ InputStream in) throws IOException, CompressorException {
169
+ CompressorStreamFactory factory = new CompressorStreamFactory();
170
+ factory.setDecompressConcatenated(decompressConcatenated);
171
+ if (CommonsCompressUtil.isAutoDetect(format)) {
172
+ in = in.markSupported() ? in : new BufferedInputStream(in);
173
+ try {
174
+ return factory.createCompressorInputStream(in);
175
+ } catch (CompressorException e) {
176
+ throw new IOException(
177
+ "Failed to detect a file format. Please try to set a format explicitly.",
178
+ e);
179
+ }
180
+ } else {
181
+ return factory.createCompressorInputStream(format, in);
182
+ }
183
+ }
184
+
185
+ private Iterator<InputStream> toIterator(InputStream in) {
186
+ List<InputStream> list = new ArrayList<InputStream>(1);
187
+ list.add(in);
188
+ return list.iterator();
189
+ }
190
+ }
@@ -0,0 +1,155 @@
1
+ package org.embulk.decoder.unzip;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collections;
5
+ import java.util.List;
6
+
7
+ import org.apache.commons.compress.archivers.ArchiveStreamFactory;
8
+ import org.apache.commons.compress.compressors.CompressorStreamFactory;
9
+
10
+ class CommonsCompressUtil {
11
+ // TODO: It may be better to check performance between Set and array.
12
+ // NOTE: Some file types may not work in an environment because some required
13
+ // libraries are not found.
14
+ static final String[] archiveFormats = {
15
+ ArchiveStreamFactory.AR,
16
+ ArchiveStreamFactory.ARJ,
17
+ ArchiveStreamFactory.CPIO,
18
+ ArchiveStreamFactory.DUMP,
19
+ ArchiveStreamFactory.JAR,
20
+ ArchiveStreamFactory.SEVEN_Z,
21
+ ArchiveStreamFactory.TAR,
22
+ ArchiveStreamFactory.ZIP,
23
+ };
24
+
25
+ // Even indexes have both extensions and aliases. And odd indexes are
26
+ // CompressorStreamFactory values.
27
+ static final String[] compressorFormats = {
28
+ CompressorStreamFactory.BZIP2,
29
+ CompressorStreamFactory.DEFLATE,
30
+ CompressorStreamFactory.GZIP,
31
+ CompressorStreamFactory.LZMA,
32
+ CompressorStreamFactory.PACK200,
33
+ CompressorStreamFactory.SNAPPY_FRAMED,
34
+ CompressorStreamFactory.SNAPPY_RAW,
35
+ CompressorStreamFactory.XZ,
36
+ CompressorStreamFactory.Z,
37
+ "bz2", // These values should be handled by normalizeFormats
38
+ "gzip",
39
+ };
40
+
41
+ // This table is even indexes have short extensions and odd indexes has
42
+ // split formats for each short extensions.
43
+ private static final String[] solidCompressionFormats = {
44
+ "tgz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.GZIP,
45
+ "tar.gz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.GZIP,
46
+ "tbz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.BZIP2,
47
+ "tbz2", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.BZIP2,
48
+ "tb2", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.BZIP2,
49
+ "tar.bz2", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.BZIP2,
50
+ "taz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.Z,
51
+ "tz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.Z,
52
+ "tar.Z", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.Z,
53
+ "tlz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.LZMA,
54
+ "tar.lz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.LZMA,
55
+ "tar.lzma", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.LZMA,
56
+ "txz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.XZ,
57
+ "tar.xz", ArchiveStreamFactory.TAR + " " + CompressorStreamFactory.XZ
58
+ };
59
+
60
+ static boolean isArchiveFormat(String format) {
61
+ for (String fmt : archiveFormats) {
62
+ if (fmt.equalsIgnoreCase(format)) {
63
+ return true;
64
+ }
65
+ }
66
+ return false;
67
+ }
68
+
69
+ static boolean isCompressorFormat(String format) {
70
+ for (String fmt : compressorFormats) {
71
+ if (fmt.equalsIgnoreCase(format)) {
72
+ return true;
73
+ }
74
+ }
75
+ return false;
76
+ }
77
+
78
+ static boolean isAutoDetect(String format) {
79
+ return format == null || format.length() == 0;
80
+ }
81
+
82
+ /**
83
+ * Split solid compresson formats and reorder to decode the formats
84
+ * based on this order.
85
+ *
86
+ * If format is a single format like "tar", then return
87
+ * new String[]{"tar"}.
88
+ * If format is a solid compresson format like "tgz", then return
89
+ * new String[]{"gzip", "tar"}.
90
+ * If format is "tar bzip2", then return
91
+ * new String[]{"bzip2", "tar"}.
92
+ *
93
+ * @param format contains a file format or some file formats.
94
+ * @return a single format or multi format values.
95
+ * Otherwise, returns null.
96
+ */
97
+ static String[] toFormats(String format) {
98
+ if (isAutoDetect(format)) {
99
+ return null;
100
+ } else if (isArchiveFormat(format) || isCompressorFormat(format)) {
101
+ return normalizeFormats(splitAndReverse(format));
102
+ }
103
+
104
+ String[] formats = toSolidCompressionFormats(format);
105
+ if (formats != null) {
106
+ return formats;
107
+ }
108
+
109
+ formats = normalizeFormats(splitAndReverse(format));
110
+
111
+ for (String s : formats) {
112
+ if (!(isArchiveFormat(s) || isCompressorFormat(s))) {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ return formats;
118
+ }
119
+
120
+ private static String[] toSolidCompressionFormats(String format) {
121
+ for (int i = 0;i < solidCompressionFormats.length; i+= 2) {
122
+ if (solidCompressionFormats[i].equalsIgnoreCase(format)) {
123
+ return splitAndReverse(solidCompressionFormats[i + 1]);
124
+ }
125
+ }
126
+ return null;
127
+ }
128
+
129
+ private static String[] splitAndReverse(String format) {
130
+ List<String> result = new ArrayList<>();
131
+ for (String s : format.split(" ")) {
132
+ if (s.length() > 0) {
133
+ result.add(s);
134
+ }
135
+ }
136
+ Collections.reverse(result);
137
+ return result.toArray(new String[result.size()]);
138
+ }
139
+
140
+ private static String[] normalizeFormats(String... formats) {
141
+ if (formats == null || formats.length == 0) {
142
+ return formats;
143
+ }
144
+
145
+ for (int i = 0;i < formats.length;i++) {
146
+ if (formats[i].equalsIgnoreCase("gzip")) {
147
+ formats[i] = CompressorStreamFactory.GZIP;
148
+ } else if (formats[i].equalsIgnoreCase("bz2")) {
149
+ formats[i] = CompressorStreamFactory.BZIP2;
150
+ }
151
+ }
152
+
153
+ return formats;
154
+ }
155
+ }
@@ -1,8 +1,5 @@
1
1
  package org.embulk.decoder.unzip;
2
2
 
3
- import java.io.InputStream;
4
- import java.io.IOException;
5
-
6
3
  import org.embulk.config.Config;
7
4
  import org.embulk.config.ConfigDefault;
8
5
  import org.embulk.config.ConfigInject;
@@ -13,7 +10,6 @@ import org.embulk.spi.BufferAllocator;
13
10
  import org.embulk.spi.DecoderPlugin;
14
11
  import org.embulk.spi.FileInput;
15
12
  import org.embulk.spi.util.FileInputInputStream;
16
- import org.embulk.spi.util.InputStreamFileInput;
17
13
 
18
14
  public class UnzipDecoderPlugin
19
15
  implements DecoderPlugin
@@ -21,6 +17,18 @@ public class UnzipDecoderPlugin
21
17
  public interface PluginTask
22
18
  extends Task
23
19
  {
20
+ @Config("format")
21
+ @ConfigDefault("\"\"")
22
+ public String getFormat();
23
+
24
+ @Config("decompress_concatenated")
25
+ @ConfigDefault("true")
26
+ public boolean getDecompressConcatenated();
27
+
28
+ @Config("match_name")
29
+ @ConfigDefault("\"\"")
30
+ public String getMatchName();
31
+
24
32
  // @Config("skip_on_error")
25
33
  // @ConfigDefault("true")
26
34
  // public boolean skipOnError();
@@ -40,40 +48,13 @@ public class UnzipDecoderPlugin
40
48
  @Override
41
49
  public FileInput open(TaskSource taskSource, FileInput fileInput)
42
50
  {
51
+ String zipFileName = fileInput.hintOfCurrentInputFileNameForLogging().get();
52
+ System.out.println(zipFileName);
43
53
  final PluginTask task = taskSource.loadTask(PluginTask.class);
44
54
 
45
55
  final FileInputInputStream files = new FileInputInputStream(fileInput);
46
-
47
- InputStreamFileInput isfi = null;
48
- try {
49
- isfi = new InputStreamFileInput(
50
- task.getBufferAllocator(),
51
- new InputStreamFileInput.Provider() {
52
- public InputStream openNext() throws IOException
53
- {
54
- if (!files.nextFile()) {
55
- return null;
56
- }
57
- return newDecoderInputStream(task, files);
58
- }
59
-
60
- public void close() throws IOException
61
- {
62
- files.close();
63
- }
64
- });
65
- } catch (Exception e) {
66
- // if(task.skipOnError()) {
67
- // System.out.println("skip: " + isfi.hintOfCurrentInputFileNameForLogging());
68
- // return null;
69
- // } else
70
- throw new RuntimeException(e);
71
- }
72
- return isfi;
56
+ return new CommonsCompressFileInput(task.getBufferAllocator(),
57
+ new CommonsCompressProvider(task, files));
73
58
  }
74
59
 
75
- private static InputStream newDecoderInputStream(PluginTask task, InputStream file) throws IOException
76
- {
77
- return new UnzipInputStream(file);
78
- }
79
60
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-decoder-unzip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - fundoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-07 00:00:00.000000000 Z
11
+ date: 2020-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -49,7 +49,8 @@ files:
49
49
  - LICENSE.txt
50
50
  - README.md
51
51
  - build.gradle
52
- - classpath/embulk-decoder-unzip-0.1.1.jar
52
+ - classpath/commons-compress-1.20.jar
53
+ - classpath/embulk-decoder-unzip-0.1.2.jar
53
54
  - config/checkstyle/checkstyle.xml
54
55
  - config/checkstyle/default.xml
55
56
  - gradle/wrapper/gradle-wrapper.jar
@@ -58,12 +59,16 @@ files:
58
59
  - gradlew.bat
59
60
  - lib/embulk/decoder/unzip.rb
60
61
  - lib/embulk/guess/unzip.rb
62
+ - src/main/java/org/embulk/decoder/unzip/ArchiveInputStreamIterator.java
63
+ - src/main/java/org/embulk/decoder/unzip/CommonsCompressFileInput.java
64
+ - src/main/java/org/embulk/decoder/unzip/CommonsCompressProvider.java
65
+ - src/main/java/org/embulk/decoder/unzip/CommonsCompressUtil.java
61
66
  - src/main/java/org/embulk/decoder/unzip/UnzipDecoderPlugin.java
62
- - src/main/java/org/embulk/decoder/unzip/UnzipInputStream.java
63
67
  - src/main/resources/config.yml
64
68
  - src/main/resources/sample.csv
65
69
  - src/main/resources/test1.zip
66
70
  - src/main/resources/test2.zip
71
+ - src/main/resources/test3.zip
67
72
  - src/test/java/org/embulk/decoder/unzip/TestUnzipDecoderPlugin.java
68
73
  homepage:
69
74
  licenses:
@@ -1,29 +0,0 @@
1
- package org.embulk.decoder.unzip;
2
-
3
- import java.io.BufferedInputStream;
4
- import java.io.IOException;
5
- import java.io.InputStream;
6
- import java.nio.charset.StandardCharsets;
7
- import java.util.zip.ZipEntry;
8
- import java.util.zip.ZipInputStream;
9
-
10
- public class UnzipInputStream extends InputStream {
11
-
12
- private ZipInputStream zis;
13
-
14
- public UnzipInputStream(InputStream is) {
15
- zis = new ZipInputStream(new BufferedInputStream(is), StandardCharsets.UTF_8);
16
- }
17
-
18
- @Override
19
- public int read() throws IOException {
20
-
21
- ZipEntry zipentry = zis.getNextEntry();
22
- int v = -1;
23
- if(zipentry != null)
24
- v = zis.read();
25
- return v;
26
-
27
- }
28
-
29
- }