embulk-input-hdfs 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHENGELOG.md +5 -0
- data/README.md +20 -17
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/hdfs/ConfigurationFactory.java +60 -0
- data/src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java +107 -200
- data/src/main/java/org/embulk/input/hdfs/Strftime.java +34 -0
- data/src/main/java/org/embulk/input/hdfs/TargetFileInfo.java +174 -0
- data/src/main/java/org/embulk/input/hdfs/{PartialFileList.java → TargetFileInfoList.java} +73 -90
- data/src/main/java/org/embulk/input/hdfs/TargetFileInputStreamFactory.java +128 -0
- data/src/main/java/org/embulk/input/hdfs/{PartialFileInputStream.java → TargetFilePartialInputStream.java} +4 -6
- data/src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java +6 -8
- metadata +19 -18
- data/src/main/java/org/embulk/input/hdfs/ConfigurationBuilder.java +0 -82
- data/src/main/java/org/embulk/input/hdfs/PartialFile.java +0 -48
- data/src/main/java/org/embulk/input/hdfs/PartialFileInputStreamBuilder.java +0 -125
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.input.hdfs;
|
2
|
+
|
3
|
+
import org.apache.hadoop.fs.FileSystem;
|
4
|
+
import org.apache.hadoop.fs.Path;
|
5
|
+
import org.apache.hadoop.io.compress.CompressionCodec;
|
6
|
+
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.slf4j.Logger;
|
9
|
+
|
10
|
+
import java.io.BufferedInputStream;
|
11
|
+
import java.io.ByteArrayInputStream;
|
12
|
+
import java.io.ByteArrayOutputStream;
|
13
|
+
import java.io.IOException;
|
14
|
+
import java.io.InputStream;
|
15
|
+
import java.io.SequenceInputStream;
|
16
|
+
|
17
|
+
public class TargetFileInputStreamFactory
|
18
|
+
{
|
19
|
+
private static final Logger logger = Exec.getLogger(TargetFileInputStreamFactory.class);
|
20
|
+
private final FileSystem fs;
|
21
|
+
|
22
|
+
public TargetFileInputStreamFactory(FileSystem fs)
|
23
|
+
{
|
24
|
+
this.fs = fs;
|
25
|
+
}
|
26
|
+
|
27
|
+
public InputStream create(TargetFileInfo t)
|
28
|
+
throws IOException
|
29
|
+
{
|
30
|
+
InputStream is = createSuitableInputStream(t);
|
31
|
+
return createInputStreamWithHeaders(is, t);
|
32
|
+
}
|
33
|
+
|
34
|
+
private InputStream createSuitableInputStream(TargetFileInfo t)
|
35
|
+
throws IOException
|
36
|
+
{
|
37
|
+
if (t.getIsDecompressible()) {
|
38
|
+
logger.debug("embulk-input-hdfs: createDecompressedInputStream: {}", t.getPathString());
|
39
|
+
return createDecompressedInputStream(t);
|
40
|
+
}
|
41
|
+
else if (t.getIsPartitionable()) {
|
42
|
+
logger.debug("embulk-input-hdfs: createPartialInputStream: {}, start:{}, end:{}",
|
43
|
+
t.getPathString(), t.getStart(), t.getEnd());
|
44
|
+
return createPartialInputStream(t);
|
45
|
+
}
|
46
|
+
else {
|
47
|
+
logger.debug("embulk-input-hdfs: createOriginalInputStream: {}", t.getPathString());
|
48
|
+
return createOriginalInputStream(t);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
private InputStream createInputStreamWithHeaders(InputStream original, TargetFileInfo t)
|
53
|
+
throws IOException
|
54
|
+
{
|
55
|
+
if (t.getStart() > 0 && t.getNumHeaderLines() > 0) {
|
56
|
+
logger.debug("embulk-input-hdfs: createInputStreamWithHeaders: {}", t.getPathString());
|
57
|
+
InputStream headers = createHeadersInputStream(t);
|
58
|
+
return new SequenceInputStream(headers, original);
|
59
|
+
}
|
60
|
+
else {
|
61
|
+
return original;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
private InputStream createOriginalInputStream(TargetFileInfo t)
|
66
|
+
throws IOException
|
67
|
+
{
|
68
|
+
return fs.open(new Path(t.getPathString()));
|
69
|
+
}
|
70
|
+
|
71
|
+
private InputStream createDecompressedInputStream(TargetFileInfo t)
|
72
|
+
throws IOException
|
73
|
+
{
|
74
|
+
InputStream original = createOriginalInputStream(t);
|
75
|
+
CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
|
76
|
+
CompressionCodec codec = factory.getCodec(new Path(t.getPathString()));
|
77
|
+
if (codec == null) {
|
78
|
+
logger.debug("embulk-input-hdfs: CompressionCodec: null: {}", t.getPathString());
|
79
|
+
return original;
|
80
|
+
}
|
81
|
+
else {
|
82
|
+
logger.debug("embulk-input-hdfs: CompressionCodec: {}: {}", codec, t.getPathString());
|
83
|
+
return codec.createInputStream(original);
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
private InputStream createPartialInputStream(TargetFileInfo t)
|
88
|
+
throws IOException
|
89
|
+
{
|
90
|
+
InputStream original = createOriginalInputStream(t);
|
91
|
+
return new TargetFilePartialInputStream(original, t.getStart(), t.getEnd());
|
92
|
+
}
|
93
|
+
|
94
|
+
private InputStream createHeadersInputStream(TargetFileInfo t)
|
95
|
+
throws IOException
|
96
|
+
{
|
97
|
+
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
98
|
+
int skippedHeaders = 0;
|
99
|
+
InputStream is = createOriginalInputStream(t);
|
100
|
+
try (BufferedInputStream in = new BufferedInputStream(is)) {
|
101
|
+
while (true) {
|
102
|
+
int c = in.read();
|
103
|
+
if (c < 0) {
|
104
|
+
break;
|
105
|
+
}
|
106
|
+
|
107
|
+
header.write(c);
|
108
|
+
|
109
|
+
if (c == '\n') {
|
110
|
+
skippedHeaders++;
|
111
|
+
}
|
112
|
+
else if (c == '\r') {
|
113
|
+
int c2 = in.read();
|
114
|
+
if (c2 == '\n') {
|
115
|
+
header.write(c2);
|
116
|
+
}
|
117
|
+
skippedHeaders++;
|
118
|
+
}
|
119
|
+
|
120
|
+
if (skippedHeaders >= t.getNumHeaderLines()) {
|
121
|
+
break;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
}
|
125
|
+
header.close();
|
126
|
+
return new ByteArrayInputStream(header.toByteArray());
|
127
|
+
}
|
128
|
+
}
|
@@ -1,15 +1,13 @@
|
|
1
1
|
package org.embulk.input.hdfs;
|
2
2
|
|
3
|
+
// Ported from https://github.com/hito4t/embulk-input-filesplit/blob/master/src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java
|
4
|
+
|
3
5
|
import java.io.BufferedInputStream;
|
4
6
|
import java.io.IOException;
|
5
7
|
import java.io.InputStream;
|
6
8
|
import java.io.PushbackInputStream;
|
7
9
|
|
8
|
-
|
9
|
-
* Created by takahiro.nakayama on 2/13/16.
|
10
|
-
* ref. https://github.com/hito4t/embulk-input-filesplit/blob/master/src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java
|
11
|
-
*/
|
12
|
-
public class PartialFileInputStream
|
10
|
+
public class TargetFilePartialInputStream
|
13
11
|
extends InputStream
|
14
12
|
{
|
15
13
|
private final PushbackInputStream original;
|
@@ -18,7 +16,7 @@ public class PartialFileInputStream
|
|
18
16
|
private long current;
|
19
17
|
private boolean eof;
|
20
18
|
|
21
|
-
public
|
19
|
+
public TargetFilePartialInputStream(InputStream original, long start, long end)
|
22
20
|
{
|
23
21
|
this.original = new PushbackInputStream(new BufferedInputStream(original));
|
24
22
|
this.start = start;
|
@@ -30,7 +30,6 @@ import javax.annotation.Nullable;
|
|
30
30
|
|
31
31
|
import java.io.File;
|
32
32
|
import java.util.ArrayList;
|
33
|
-
import java.util.Iterator;
|
34
33
|
import java.util.List;
|
35
34
|
|
36
35
|
import static org.junit.Assert.assertEquals;
|
@@ -43,7 +42,6 @@ public class TestHdfsFileInputPlugin
|
|
43
42
|
@Rule
|
44
43
|
public ExpectedException exception = ExpectedException.none();
|
45
44
|
|
46
|
-
private Logger logger = runtime.getExec().getLogger(TestHdfsFileInputPlugin.class);
|
47
45
|
private HdfsFileInputPlugin plugin;
|
48
46
|
private FileInputRunner runner;
|
49
47
|
private MockPageOutput output;
|
@@ -67,11 +65,11 @@ public class TestHdfsFileInputPlugin
|
|
67
65
|
assertEquals(path.toString(), task.getPath());
|
68
66
|
assertEquals(Lists.newArrayList(), task.getConfigFiles());
|
69
67
|
assertEquals(Maps.newHashMap(), task.getConfig());
|
70
|
-
assertEquals(true, task.
|
68
|
+
assertEquals(true, task.getWillPartition());
|
71
69
|
assertEquals(0, task.getRewindSeconds());
|
72
70
|
assertEquals(-1, task.getApproximateNumPartitions());
|
73
71
|
assertEquals(0, task.getSkipHeaderLines());
|
74
|
-
assertEquals(false, task.
|
72
|
+
assertEquals(false, task.getWillDecompress());
|
75
73
|
}
|
76
74
|
|
77
75
|
@Test(expected = ConfigException.class)
|
@@ -103,9 +101,9 @@ public class TestHdfsFileInputPlugin
|
|
103
101
|
});
|
104
102
|
|
105
103
|
List<String> resultFList = Lists.newArrayList();
|
106
|
-
for (int i = 0; i < task.
|
107
|
-
for (
|
108
|
-
resultFList.add(
|
104
|
+
for (int i = 0; i < task.getTargetFileInfoList().getTaskCount();i++) {
|
105
|
+
for (TargetFileInfo targetFileInfo : task.getTargetFileInfoList().get(i)) {
|
106
|
+
resultFList.add(targetFileInfo.getPathString());
|
109
107
|
}
|
110
108
|
}
|
111
109
|
assertEquals(fileList.size(), resultFList.size());
|
@@ -152,7 +150,7 @@ public class TestHdfsFileInputPlugin
|
|
152
150
|
config.set("path", "/tmp/%Y-%m-%d");
|
153
151
|
config.set("rewind_seconds", 86400);
|
154
152
|
PluginTask task = config.loadConfig(PluginTask.class);
|
155
|
-
String result =
|
153
|
+
String result = new Strftime(task).format(task.getPath());
|
156
154
|
String expected = task.getJRuby().runScriptlet("(Time.now - 86400).strftime('/tmp/%Y-%m-%d')").toString();
|
157
155
|
assertEquals(expected, result);
|
158
156
|
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Reads files stored on Hdfs.
|
42
42
|
email:
|
43
43
|
- civitaspo@gmail.com
|
@@ -61,12 +61,13 @@ files:
|
|
61
61
|
- gradlew
|
62
62
|
- gradlew.bat
|
63
63
|
- lib/embulk/input/hdfs.rb
|
64
|
-
- src/main/java/org/embulk/input/hdfs/
|
64
|
+
- src/main/java/org/embulk/input/hdfs/ConfigurationFactory.java
|
65
65
|
- src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java
|
66
|
-
- src/main/java/org/embulk/input/hdfs/
|
67
|
-
- src/main/java/org/embulk/input/hdfs/
|
68
|
-
- src/main/java/org/embulk/input/hdfs/
|
69
|
-
- src/main/java/org/embulk/input/hdfs/
|
66
|
+
- src/main/java/org/embulk/input/hdfs/Strftime.java
|
67
|
+
- src/main/java/org/embulk/input/hdfs/TargetFileInfo.java
|
68
|
+
- src/main/java/org/embulk/input/hdfs/TargetFileInfoList.java
|
69
|
+
- src/main/java/org/embulk/input/hdfs/TargetFileInputStreamFactory.java
|
70
|
+
- src/main/java/org/embulk/input/hdfs/TargetFilePartialInputStream.java
|
70
71
|
- src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java
|
71
72
|
- src/test/resources/sample_01.csv
|
72
73
|
- src/test/resources/sample_02.csv
|
@@ -94,7 +95,7 @@ files:
|
|
94
95
|
- classpath/curator-client-2.6.0.jar
|
95
96
|
- classpath/curator-framework-2.6.0.jar
|
96
97
|
- classpath/curator-recipes-2.6.0.jar
|
97
|
-
- classpath/embulk-input-hdfs-0.
|
98
|
+
- classpath/embulk-input-hdfs-0.3.0.jar
|
98
99
|
- classpath/gson-2.2.4.jar
|
99
100
|
- classpath/hadoop-annotations-2.6.4.jar
|
100
101
|
- classpath/hadoop-auth-2.6.4.jar
|
@@ -1,82 +0,0 @@
|
|
1
|
-
package org.embulk.input.hdfs;
|
2
|
-
|
3
|
-
import com.google.common.collect.ImmutableList;
|
4
|
-
import com.google.common.collect.ImmutableMap;
|
5
|
-
import com.google.common.collect.Lists;
|
6
|
-
import com.google.common.collect.Maps;
|
7
|
-
import org.apache.hadoop.conf.Configuration;
|
8
|
-
import org.embulk.config.ConfigException;
|
9
|
-
import org.embulk.spi.Exec;
|
10
|
-
import org.slf4j.Logger;
|
11
|
-
|
12
|
-
import java.io.File;
|
13
|
-
import java.net.MalformedURLException;
|
14
|
-
import java.util.List;
|
15
|
-
import java.util.Map;
|
16
|
-
|
17
|
-
/**
|
18
|
-
* Created by takahiro.nakayama on 2/22/16.
|
19
|
-
*/
|
20
|
-
public class ConfigurationBuilder
|
21
|
-
{
|
22
|
-
private static final Logger logger = Exec.getLogger(ConfigurationBuilder.class);
|
23
|
-
private final ImmutableList.Builder<String> configFilesBuilder;
|
24
|
-
private final ImmutableMap.Builder<String, String> configMapBuilder;
|
25
|
-
|
26
|
-
public ConfigurationBuilder()
|
27
|
-
{
|
28
|
-
this.configFilesBuilder = ImmutableList.builder();
|
29
|
-
this.configMapBuilder = ImmutableMap.builder();
|
30
|
-
}
|
31
|
-
|
32
|
-
public ConfigurationBuilder addConfigFiles(List<String> configFiles)
|
33
|
-
{
|
34
|
-
for (String configFile : configFiles) {
|
35
|
-
addConfigFile(configFile);
|
36
|
-
}
|
37
|
-
return this;
|
38
|
-
}
|
39
|
-
|
40
|
-
public ConfigurationBuilder addConfigFile(String configFile)
|
41
|
-
{
|
42
|
-
configFilesBuilder.add(configFile);
|
43
|
-
return this;
|
44
|
-
}
|
45
|
-
|
46
|
-
public ConfigurationBuilder addConfigMap(Map<String, String> configMap)
|
47
|
-
{
|
48
|
-
for (Map.Entry<String, String> entry : configMap.entrySet()) {
|
49
|
-
addConfig(entry.getKey(), entry.getValue());
|
50
|
-
}
|
51
|
-
return this;
|
52
|
-
}
|
53
|
-
|
54
|
-
public ConfigurationBuilder addConfig(String key, String value)
|
55
|
-
{
|
56
|
-
configMapBuilder.put(key, value);
|
57
|
-
return this;
|
58
|
-
}
|
59
|
-
|
60
|
-
public Configuration build()
|
61
|
-
{
|
62
|
-
Configuration configuration = new Configuration();
|
63
|
-
for (String configFile : configFilesBuilder.build()) {
|
64
|
-
File file = new File(configFile);
|
65
|
-
try {
|
66
|
-
configuration.addResource(file.toURI().toURL());
|
67
|
-
}
|
68
|
-
catch (MalformedURLException e) {
|
69
|
-
throw new ConfigException(e);
|
70
|
-
}
|
71
|
-
}
|
72
|
-
for (Map.Entry<String, String> entry : configMapBuilder.build().entrySet()) {
|
73
|
-
configuration.set(entry.getKey(), entry.getValue());
|
74
|
-
}
|
75
|
-
// For debug
|
76
|
-
for (Map.Entry<String, String> entry : configuration) {
|
77
|
-
logger.trace("{}: {}", entry.getKey(), entry.getValue());
|
78
|
-
}
|
79
|
-
logger.trace("Resource Files: {}", configuration);
|
80
|
-
return configuration;
|
81
|
-
}
|
82
|
-
}
|
@@ -1,48 +0,0 @@
|
|
1
|
-
package org.embulk.input.hdfs;
|
2
|
-
|
3
|
-
import org.apache.hadoop.fs.Path;
|
4
|
-
|
5
|
-
/**
|
6
|
-
* Created by takahiro.nakayama on 2/20/16.
|
7
|
-
* is the same as PartialFileList.Entry, so this class does not need?
|
8
|
-
*/
|
9
|
-
public class PartialFile
|
10
|
-
{
|
11
|
-
private final Path path;
|
12
|
-
private final long start;
|
13
|
-
private final long end;
|
14
|
-
private final boolean canDecompress;
|
15
|
-
|
16
|
-
public PartialFile(String path, long start, long end, boolean canDecompress)
|
17
|
-
{
|
18
|
-
this(new Path(path), start, end, canDecompress);
|
19
|
-
}
|
20
|
-
|
21
|
-
public PartialFile(Path path, long start, long end, boolean canDecompress)
|
22
|
-
{
|
23
|
-
this.path = path;
|
24
|
-
this.start = start;
|
25
|
-
this.end = end;
|
26
|
-
this.canDecompress = canDecompress;
|
27
|
-
}
|
28
|
-
|
29
|
-
public Path getPath()
|
30
|
-
{
|
31
|
-
return path;
|
32
|
-
}
|
33
|
-
|
34
|
-
public long getStart()
|
35
|
-
{
|
36
|
-
return start;
|
37
|
-
}
|
38
|
-
|
39
|
-
public long getEnd()
|
40
|
-
{
|
41
|
-
return end;
|
42
|
-
}
|
43
|
-
|
44
|
-
public boolean getCanDecompress()
|
45
|
-
{
|
46
|
-
return canDecompress;
|
47
|
-
}
|
48
|
-
}
|
@@ -1,125 +0,0 @@
|
|
1
|
-
package org.embulk.input.hdfs;
|
2
|
-
|
3
|
-
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Throwables;
|
5
|
-
import org.apache.hadoop.fs.FileSystem;
|
6
|
-
import org.apache.hadoop.io.compress.CodecPool;
|
7
|
-
import org.apache.hadoop.io.compress.CompressionCodec;
|
8
|
-
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
9
|
-
import org.apache.hadoop.io.compress.Decompressor;
|
10
|
-
import org.embulk.spi.Exec;
|
11
|
-
import org.slf4j.Logger;
|
12
|
-
|
13
|
-
import java.io.BufferedInputStream;
|
14
|
-
import java.io.ByteArrayInputStream;
|
15
|
-
import java.io.ByteArrayOutputStream;
|
16
|
-
import java.io.IOException;
|
17
|
-
import java.io.InputStream;
|
18
|
-
import java.io.SequenceInputStream;
|
19
|
-
|
20
|
-
/**
|
21
|
-
* Created by takahiro.nakayama on 2/21/16.
|
22
|
-
*/
|
23
|
-
public class PartialFileInputStreamBuilder
|
24
|
-
{
|
25
|
-
private static final Logger logger = Exec.getLogger(PartialFileInputStreamBuilder.class);
|
26
|
-
private final FileSystem fs;
|
27
|
-
private final PartialFile partialFile;
|
28
|
-
private int numHeaderLines = 0;
|
29
|
-
|
30
|
-
public PartialFileInputStreamBuilder(FileSystem fs, PartialFile partialFile)
|
31
|
-
{
|
32
|
-
this.fs = fs;
|
33
|
-
this.partialFile = partialFile;
|
34
|
-
}
|
35
|
-
|
36
|
-
public InputStream build()
|
37
|
-
throws IOException
|
38
|
-
{
|
39
|
-
logger.trace("path: {}, start: {}, end: {}, num_header_lines: {}",
|
40
|
-
partialFile.getPath(), partialFile.getStart(), partialFile.getEnd(), numHeaderLines);
|
41
|
-
if (partialFile.getStart() > 0 && numHeaderLines > 0) {
|
42
|
-
return new SequenceInputStream(createHeadersInputStream(), createPartialFileInputStream());
|
43
|
-
}
|
44
|
-
else {
|
45
|
-
return createPartialFileInputStream();
|
46
|
-
}
|
47
|
-
}
|
48
|
-
|
49
|
-
public PartialFileInputStreamBuilder withHeaders(int numHeaderLines)
|
50
|
-
{
|
51
|
-
this.numHeaderLines = numHeaderLines;
|
52
|
-
return this;
|
53
|
-
}
|
54
|
-
|
55
|
-
private InputStream createOriginalFileWrappedInputStream()
|
56
|
-
{
|
57
|
-
InputStream original = createOriginalFileInputStream();
|
58
|
-
CompressionCodec codec = new CompressionCodecFactory(fs.getConf()).getCodec(partialFile.getPath());
|
59
|
-
if (partialFile.getCanDecompress() && codec != null) {
|
60
|
-
try {
|
61
|
-
return codec.createInputStream(original);
|
62
|
-
}
|
63
|
-
catch (IOException e) {
|
64
|
-
throw Throwables.propagate(e);
|
65
|
-
}
|
66
|
-
}
|
67
|
-
else {
|
68
|
-
return original;
|
69
|
-
}
|
70
|
-
}
|
71
|
-
|
72
|
-
private InputStream createOriginalFileInputStream()
|
73
|
-
{
|
74
|
-
try {
|
75
|
-
return fs.open(partialFile.getPath());
|
76
|
-
}
|
77
|
-
catch (IOException e) {
|
78
|
-
throw Throwables.propagate(e);
|
79
|
-
}
|
80
|
-
}
|
81
|
-
|
82
|
-
// memo: compressioncodec使ったinputstream作る奴いても良いような…
|
83
|
-
// じゃないと、headers もおかしくなるような…ならんか
|
84
|
-
|
85
|
-
private InputStream createPartialFileInputStream()
|
86
|
-
{
|
87
|
-
InputStream original = createOriginalFileWrappedInputStream();
|
88
|
-
return new PartialFileInputStream(original, partialFile.getStart(), partialFile.getEnd());
|
89
|
-
}
|
90
|
-
|
91
|
-
private InputStream createHeadersInputStream()
|
92
|
-
throws IOException
|
93
|
-
{
|
94
|
-
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
95
|
-
int skippedHeaders = 0;
|
96
|
-
InputStream original = createOriginalFileWrappedInputStream();
|
97
|
-
try (BufferedInputStream in = new BufferedInputStream(original)) {
|
98
|
-
while (true) {
|
99
|
-
int c = in.read();
|
100
|
-
if (c < 0) {
|
101
|
-
break;
|
102
|
-
}
|
103
|
-
|
104
|
-
header.write(c);
|
105
|
-
|
106
|
-
if (c == '\n') {
|
107
|
-
skippedHeaders++;
|
108
|
-
}
|
109
|
-
else if (c == '\r') {
|
110
|
-
int c2 = in.read();
|
111
|
-
if (c2 == '\n') {
|
112
|
-
header.write(c2);
|
113
|
-
}
|
114
|
-
skippedHeaders++;
|
115
|
-
}
|
116
|
-
|
117
|
-
if (skippedHeaders >= numHeaderLines) {
|
118
|
-
break;
|
119
|
-
}
|
120
|
-
}
|
121
|
-
}
|
122
|
-
header.close();
|
123
|
-
return new ByteArrayInputStream(header.toByteArray());
|
124
|
-
}
|
125
|
-
}
|