embulk-output-hdfs 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/README.md +11 -8
- data/build.gradle +5 -5
- data/classpath/embulk-output-hdfs-0.2.0.jar +0 -0
- data/lib/embulk/output/hdfs.rb +1 -1
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +198 -0
- data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +5 -0
- metadata +8 -8
- data/classpath/embulk-output-hdfs-0.1.2.jar +0 -0
- data/src/main/java/org/embulk/output/HdfsOutputPlugin.java +0 -219
- data/src/test/java/org/embulk/output/TestHdfsOutputPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e23e49bd7a7a7cb1587c929faf9979fe3ae5cf94
|
4
|
+
data.tar.gz: 13833b306d2d69da411177c6f4a094955b572f79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ac5ec5966dd880ed625d85afdaa8bd587dcf03aadfa4c2464f5bb78034dea247d263197ced5a1130d3d394a801dd9ea3d5a91180973cc4ced4dd43c332c1a10
|
7
|
+
data.tar.gz: 82f2d8d22029f356925e7412d9a891317d5b6c4fa667787ce2df46fd7195db4aaf813410c705e590ba21958fcb9242a81ca6b4e04d2a4904c4770eacf23f95eb
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
# Hdfs output plugin for Embulk
|
1
|
+
# Hdfs file output plugin for Embulk
|
2
2
|
|
3
3
|
A File Output Plugin for Embulk to write HDFS.
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
7
7
|
* **Plugin type**: file output
|
8
|
-
* **Load all or nothing**:
|
8
|
+
* **Load all or nothing**: yes
|
9
9
|
* **Resume supported**: no
|
10
10
|
* **Cleanup supported**: no
|
11
11
|
|
@@ -13,8 +13,12 @@ A File Output Plugin for Embulk to write HDFS.
|
|
13
13
|
|
14
14
|
- **config_files** list of paths to Hadoop's configuration files (array of strings, default: `[]`)
|
15
15
|
- **config** overwrites configuration parameters (hash, default: `{}`)
|
16
|
-
- **
|
17
|
-
- **
|
16
|
+
- **path_prefix** prefix of target files (string, required)
|
17
|
+
- **file_ext** suffix of target files (string, required)
|
18
|
+
- **sequence_format** format for sequence part of target files (string, default: `'.%03d.%02d'`)
|
19
|
+
- **rewind_seconds** When you use Date format in path_prefix property(like `/tmp/embulk/%Y-%m-%d/out`), the format is interpreted by using the time which is Now minus this property. (int, default: `0`)
|
20
|
+
- **overwrite** overwrite files when the same filenames already exists (boolean, default: `false`)
|
21
|
+
- *caution*: even if this property is `true`, this does not mean ensuring the idempotence. if you want to ensure the idempotence, you need the procedures to remove output files after or before running.
|
18
22
|
|
19
23
|
## Example
|
20
24
|
|
@@ -24,14 +28,13 @@ out:
|
|
24
28
|
config_files:
|
25
29
|
- /etc/hadoop/conf/core-site.xml
|
26
30
|
- /etc/hadoop/conf/hdfs-site.xml
|
27
|
-
- /etc/hadoop/conf/mapred-site.xml
|
28
|
-
- /etc/hadoop/conf/yarn-site.xml
|
29
31
|
config:
|
30
32
|
fs.defaultFS: 'hdfs://hdp-nn1:8020'
|
31
|
-
dfs.replication: 1
|
32
|
-
mapreduce.client.submit.file.replication: 1
|
33
33
|
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
|
34
34
|
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
|
35
|
+
path_prefix: '/tmp/embulk/hdfs_output/%Y-%m-%d/out'
|
36
|
+
file_ext: 'txt'
|
37
|
+
overwrite: true
|
35
38
|
formatter:
|
36
39
|
type: csv
|
37
40
|
encoding: UTF-8
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ configurations {
|
|
12
12
|
provided
|
13
13
|
}
|
14
14
|
|
15
|
-
version = "0.
|
15
|
+
version = "0.2.0"
|
16
16
|
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
@@ -22,7 +22,7 @@ dependencies {
|
|
22
22
|
provided "org.embulk:embulk-core:0.7.0"
|
23
23
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
24
24
|
compile 'org.apache.hadoop:hadoop-client:2.6.0'
|
25
|
-
compile 'com.google.guava:guava:
|
25
|
+
compile 'com.google.guava:guava:15.0'
|
26
26
|
testCompile "junit:junit:4.+"
|
27
27
|
}
|
28
28
|
|
@@ -57,9 +57,9 @@ task gemspec {
|
|
57
57
|
Gem::Specification.new do |spec|
|
58
58
|
spec.name = "${project.name}"
|
59
59
|
spec.version = "${project.version}"
|
60
|
-
spec.authors = ["
|
61
|
-
spec.summary = %[Hdfs output plugin for Embulk]
|
62
|
-
spec.description = %[
|
60
|
+
spec.authors = ["Civitaspo"]
|
61
|
+
spec.summary = %[Hdfs file output plugin for Embulk]
|
62
|
+
spec.description = %[Stores files on Hdfs.]
|
63
63
|
spec.email = ["civitaspo@gmail.com"]
|
64
64
|
spec.licenses = ["MIT"]
|
65
65
|
spec.homepage = "https://github.com/civitaspo/embulk-output-hdfs"
|
Binary file
|
data/lib/embulk/output/hdfs.rb
CHANGED
@@ -0,0 +1,198 @@
|
|
1
|
+
package org.embulk.output.hdfs;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.io.OutputStream;
|
5
|
+
import java.util.ArrayList;
|
6
|
+
import java.util.List;
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
import org.apache.hadoop.conf.Configuration;
|
10
|
+
import org.apache.hadoop.fs.FileSystem;
|
11
|
+
import org.apache.hadoop.fs.Path;
|
12
|
+
import org.embulk.config.TaskReport;
|
13
|
+
import org.embulk.config.Config;
|
14
|
+
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.config.Task;
|
18
|
+
import org.embulk.config.TaskSource;
|
19
|
+
import org.embulk.spi.Buffer;
|
20
|
+
import org.embulk.spi.Exec;
|
21
|
+
import org.embulk.spi.FileOutputPlugin;
|
22
|
+
import org.embulk.spi.TransactionalFileOutput;
|
23
|
+
import org.jruby.embed.ScriptingContainer;
|
24
|
+
import org.slf4j.Logger;
|
25
|
+
|
26
|
+
public class HdfsFileOutputPlugin
|
27
|
+
implements FileOutputPlugin
|
28
|
+
{
|
29
|
+
private static final Logger logger = Exec.getLogger(HdfsFileOutputPlugin.class);
|
30
|
+
|
31
|
+
public interface PluginTask
|
32
|
+
extends Task
|
33
|
+
{
|
34
|
+
@Config("config_files")
|
35
|
+
@ConfigDefault("[]")
|
36
|
+
public List<String> getConfigFiles();
|
37
|
+
|
38
|
+
@Config("config")
|
39
|
+
@ConfigDefault("{}")
|
40
|
+
public Map<String, String> getConfig();
|
41
|
+
|
42
|
+
@Config("path_prefix")
|
43
|
+
public String getPathPrefix();
|
44
|
+
|
45
|
+
@Config("file_ext")
|
46
|
+
public String getFileNameExtension();
|
47
|
+
|
48
|
+
@Config("sequence_format")
|
49
|
+
@ConfigDefault("\"%03d.%02d.\"")
|
50
|
+
public String getSequenceFormat();
|
51
|
+
|
52
|
+
@Config("rewind_seconds")
|
53
|
+
@ConfigDefault("0")
|
54
|
+
public int getRewindSeconds();
|
55
|
+
|
56
|
+
@Config("overwrite")
|
57
|
+
@ConfigDefault("false")
|
58
|
+
public boolean getOverwrite();
|
59
|
+
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
64
|
+
FileOutputPlugin.Control control)
|
65
|
+
{
|
66
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
67
|
+
|
68
|
+
control.run(task.dump());
|
69
|
+
return Exec.newConfigDiff();
|
70
|
+
}
|
71
|
+
|
72
|
+
@Override
|
73
|
+
public ConfigDiff resume(TaskSource taskSource,
|
74
|
+
int taskCount,
|
75
|
+
FileOutputPlugin.Control control)
|
76
|
+
{
|
77
|
+
throw new UnsupportedOperationException("hdfs output plugin does not support resuming");
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public void cleanup(TaskSource taskSource,
|
82
|
+
int taskCount,
|
83
|
+
List<TaskReport> successTaskReports)
|
84
|
+
{
|
85
|
+
}
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
89
|
+
{
|
90
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
91
|
+
|
92
|
+
final String pathPrefix = strftime(task.getPathPrefix(), task.getRewindSeconds());
|
93
|
+
final String pathSuffix = task.getFileNameExtension();
|
94
|
+
final String sequenceFormat = task.getSequenceFormat();
|
95
|
+
|
96
|
+
return new TransactionalFileOutput()
|
97
|
+
{
|
98
|
+
private final List<String> hdfsFileNames = new ArrayList<>();
|
99
|
+
private int fileIndex = 0;
|
100
|
+
private OutputStream output = null;
|
101
|
+
|
102
|
+
@Override
|
103
|
+
public void nextFile()
|
104
|
+
{
|
105
|
+
closeCurrentStream();
|
106
|
+
Path path = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
|
107
|
+
try {
|
108
|
+
FileSystem fs = getFs(task);
|
109
|
+
output = fs.create(path, task.getOverwrite());
|
110
|
+
logger.info("Uploading '{}'", path);
|
111
|
+
}
|
112
|
+
catch (IOException e) {
|
113
|
+
logger.error(e.getMessage());
|
114
|
+
throw new RuntimeException(e);
|
115
|
+
}
|
116
|
+
hdfsFileNames.add(path.toString());
|
117
|
+
fileIndex++;
|
118
|
+
}
|
119
|
+
|
120
|
+
@Override
|
121
|
+
public void add(Buffer buffer)
|
122
|
+
{
|
123
|
+
try {
|
124
|
+
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
125
|
+
}
|
126
|
+
catch (IOException e) {
|
127
|
+
throw new RuntimeException(e);
|
128
|
+
}
|
129
|
+
finally {
|
130
|
+
buffer.release();
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void finish()
|
136
|
+
{
|
137
|
+
closeCurrentStream();
|
138
|
+
}
|
139
|
+
|
140
|
+
@Override
|
141
|
+
public void close()
|
142
|
+
{
|
143
|
+
closeCurrentStream();
|
144
|
+
}
|
145
|
+
|
146
|
+
@Override
|
147
|
+
public void abort()
|
148
|
+
{
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
public TaskReport commit()
|
153
|
+
{
|
154
|
+
TaskReport report = Exec.newTaskReport();
|
155
|
+
report.set("hdfs_file_names", hdfsFileNames);
|
156
|
+
return report;
|
157
|
+
}
|
158
|
+
|
159
|
+
private void closeCurrentStream()
|
160
|
+
{
|
161
|
+
if (output != null) {
|
162
|
+
try {
|
163
|
+
output.close();
|
164
|
+
output = null;
|
165
|
+
}
|
166
|
+
catch (IOException e) {
|
167
|
+
throw new RuntimeException(e);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
};
|
172
|
+
}
|
173
|
+
|
174
|
+
private static FileSystem getFs(final PluginTask task)
|
175
|
+
throws IOException
|
176
|
+
{
|
177
|
+
Configuration configuration = new Configuration();
|
178
|
+
|
179
|
+
for (Object configFile : task.getConfigFiles()) {
|
180
|
+
configuration.addResource(configFile.toString());
|
181
|
+
}
|
182
|
+
configuration.reloadConfiguration();
|
183
|
+
|
184
|
+
for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
|
185
|
+
configuration.set(entry.getKey(), entry.getValue());
|
186
|
+
}
|
187
|
+
|
188
|
+
return FileSystem.get(configuration);
|
189
|
+
}
|
190
|
+
|
191
|
+
private String strftime(final String raw, final int rewind_seconds)
|
192
|
+
{
|
193
|
+
ScriptingContainer jruby = new ScriptingContainer();
|
194
|
+
Object resolved = jruby.runScriptlet(
|
195
|
+
String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewind_seconds), raw));
|
196
|
+
return resolved.toString();
|
197
|
+
}
|
198
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
version: '10.0'
|
39
39
|
prerelease: false
|
40
40
|
type: :development
|
41
|
-
description:
|
41
|
+
description: Stores files on Hdfs.
|
42
42
|
email:
|
43
43
|
- civitaspo@gmail.com
|
44
44
|
executables: []
|
@@ -54,8 +54,8 @@ files:
|
|
54
54
|
- gradlew
|
55
55
|
- gradlew.bat
|
56
56
|
- lib/embulk/output/hdfs.rb
|
57
|
-
- src/main/java/org/embulk/output/
|
58
|
-
- src/test/java/org/embulk/output/
|
57
|
+
- src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java
|
58
|
+
- src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java
|
59
59
|
- classpath/activation-1.1.jar
|
60
60
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
61
61
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
@@ -79,7 +79,7 @@ files:
|
|
79
79
|
- classpath/curator-client-2.6.0.jar
|
80
80
|
- classpath/curator-framework-2.6.0.jar
|
81
81
|
- classpath/curator-recipes-2.6.0.jar
|
82
|
-
- classpath/embulk-output-hdfs-0.
|
82
|
+
- classpath/embulk-output-hdfs-0.2.0.jar
|
83
83
|
- classpath/gson-2.2.4.jar
|
84
84
|
- classpath/hadoop-annotations-2.6.0.jar
|
85
85
|
- classpath/hadoop-auth-2.6.0.jar
|
@@ -151,5 +151,5 @@ rubyforge_project:
|
|
151
151
|
rubygems_version: 2.1.9
|
152
152
|
signing_key:
|
153
153
|
specification_version: 4
|
154
|
-
summary: Hdfs output plugin for Embulk
|
154
|
+
summary: Hdfs file output plugin for Embulk
|
155
155
|
test_files: []
|
Binary file
|
@@ -1,219 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import com.google.common.base.Throwables;
|
4
|
-
import org.apache.hadoop.conf.Configuration;
|
5
|
-
import org.apache.hadoop.fs.FileSystem;
|
6
|
-
import org.apache.hadoop.fs.Path;
|
7
|
-
import org.embulk.config.*;
|
8
|
-
import org.embulk.spi.Buffer;
|
9
|
-
import org.embulk.spi.Exec;
|
10
|
-
import org.embulk.spi.FileOutputPlugin;
|
11
|
-
import org.embulk.spi.TransactionalFileOutput;
|
12
|
-
import org.jruby.embed.ScriptingContainer;
|
13
|
-
import org.slf4j.Logger;
|
14
|
-
|
15
|
-
import java.io.IOException;
|
16
|
-
import java.io.OutputStream;
|
17
|
-
import java.util.List;
|
18
|
-
import java.util.Map;
|
19
|
-
|
20
|
-
public class HdfsOutputPlugin implements FileOutputPlugin
|
21
|
-
{
|
22
|
-
private static final Logger logger = Exec.getLogger(HdfsOutputPlugin.class);
|
23
|
-
|
24
|
-
public interface PluginTask extends Task
|
25
|
-
{
|
26
|
-
@Config("config_files")
|
27
|
-
@ConfigDefault("[]")
|
28
|
-
public List<String> getConfigFiles();
|
29
|
-
|
30
|
-
@Config("config")
|
31
|
-
@ConfigDefault("{}")
|
32
|
-
public Map<String, String> getConfig();
|
33
|
-
|
34
|
-
@Config("sequence_format")
|
35
|
-
@ConfigDefault("\"%03d.%02d\"")
|
36
|
-
public String getSequenceFormat();
|
37
|
-
|
38
|
-
@Config("output_path")
|
39
|
-
@ConfigDefault("\"/tmp/embulk.output.hdfs_output.%Y%m%d_%s\"")
|
40
|
-
public String getOutputPath();
|
41
|
-
|
42
|
-
@Config("working_path")
|
43
|
-
@ConfigDefault("\"/tmp/embulk.working.hdfs_output.%Y%m%d_%s\"")
|
44
|
-
public String getWorkingPath();
|
45
|
-
|
46
|
-
}
|
47
|
-
|
48
|
-
@Override
|
49
|
-
public ConfigDiff transaction(ConfigSource config,
|
50
|
-
int taskCount,
|
51
|
-
FileOutputPlugin.Control control)
|
52
|
-
{
|
53
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
54
|
-
return resume(task.dump(), taskCount, control);
|
55
|
-
}
|
56
|
-
|
57
|
-
@Override
|
58
|
-
public ConfigDiff resume(TaskSource taskSource,
|
59
|
-
int taskCount,
|
60
|
-
FileOutputPlugin.Control control)
|
61
|
-
{
|
62
|
-
control.run(taskSource);
|
63
|
-
return Exec.newConfigDiff();
|
64
|
-
}
|
65
|
-
|
66
|
-
|
67
|
-
@Override
|
68
|
-
public void cleanup(TaskSource taskSource,
|
69
|
-
int taskCount,
|
70
|
-
List<TaskReport> successTaskReports)
|
71
|
-
{
|
72
|
-
}
|
73
|
-
|
74
|
-
@Override
|
75
|
-
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
76
|
-
{
|
77
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
78
|
-
|
79
|
-
Configuration configuration = getHdfsConfiguration(task);
|
80
|
-
FileSystem fs = getFs(configuration);
|
81
|
-
String workingPath = strftime(task.getWorkingPath());
|
82
|
-
String outputPath = strftime(task.getOutputPath());
|
83
|
-
return new TransactionalHdfsFileOutput(task, fs, workingPath, outputPath, taskIndex);
|
84
|
-
}
|
85
|
-
|
86
|
-
private Configuration getHdfsConfiguration(final PluginTask task)
|
87
|
-
{
|
88
|
-
Configuration configuration = new Configuration();
|
89
|
-
|
90
|
-
List configFiles = task.getConfigFiles();
|
91
|
-
for (Object configFile : configFiles) {
|
92
|
-
configuration.addResource(configFile.toString());
|
93
|
-
}
|
94
|
-
|
95
|
-
for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
|
96
|
-
configuration.set(entry.getKey(), entry.getValue());
|
97
|
-
}
|
98
|
-
|
99
|
-
return configuration;
|
100
|
-
}
|
101
|
-
|
102
|
-
private FileSystem getFs(final Configuration configuration) {
|
103
|
-
try {
|
104
|
-
FileSystem fs = FileSystem.get(configuration);
|
105
|
-
return fs;
|
106
|
-
}
|
107
|
-
catch (IOException e) {
|
108
|
-
logger.error(e.getMessage());
|
109
|
-
throw Throwables.propagate(e);
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
private String strftime(final String path)
|
114
|
-
{
|
115
|
-
// strftime
|
116
|
-
ScriptingContainer jruby = new ScriptingContainer();
|
117
|
-
Object result = jruby.runScriptlet("Time.now.strftime('" + path + "')");
|
118
|
-
return result.toString();
|
119
|
-
}
|
120
|
-
|
121
|
-
static class TransactionalHdfsFileOutput implements TransactionalFileOutput
|
122
|
-
{
|
123
|
-
private final int taskIndex;
|
124
|
-
private final FileSystem fs;
|
125
|
-
private final String workingPath;
|
126
|
-
private final String outputPath;
|
127
|
-
private final String sequenceFormat;
|
128
|
-
|
129
|
-
private int fileIndex = 0;
|
130
|
-
private int callCount = 0;
|
131
|
-
private Path currentPath = null;
|
132
|
-
private OutputStream currentStream = null;
|
133
|
-
|
134
|
-
public TransactionalHdfsFileOutput(PluginTask task, FileSystem fs, String workingPath, String outputPath, int taskIndex)
|
135
|
-
{
|
136
|
-
this.taskIndex = taskIndex;
|
137
|
-
this.fs = fs;
|
138
|
-
this.workingPath = workingPath;
|
139
|
-
this.outputPath = outputPath;
|
140
|
-
this.sequenceFormat = task.getSequenceFormat();
|
141
|
-
}
|
142
|
-
|
143
|
-
public void nextFile() {
|
144
|
-
closeCurrentStream();
|
145
|
-
currentPath = new Path(workingPath + '/' + String.format(sequenceFormat, taskIndex, fileIndex));
|
146
|
-
try {
|
147
|
-
if (fs.exists(currentPath)) {
|
148
|
-
throw new IllegalAccessException(currentPath.toString() + "already exists.");
|
149
|
-
}
|
150
|
-
currentStream = fs.create(currentPath);
|
151
|
-
logger.info("Uploading '{}'", currentPath.toString());
|
152
|
-
}
|
153
|
-
catch (IOException | IllegalAccessException e) {
|
154
|
-
logger.error(e.getMessage());
|
155
|
-
throw Throwables.propagate(e);
|
156
|
-
}
|
157
|
-
fileIndex++;
|
158
|
-
}
|
159
|
-
|
160
|
-
@Override
|
161
|
-
public void add(Buffer buffer) {
|
162
|
-
if (currentStream == null) {
|
163
|
-
throw new IllegalStateException("nextFile() must be called before poll()");
|
164
|
-
}
|
165
|
-
try {
|
166
|
-
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
167
|
-
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
168
|
-
callCount++;
|
169
|
-
} catch (IOException e) {
|
170
|
-
throw new RuntimeException(e);
|
171
|
-
} finally {
|
172
|
-
buffer.release();
|
173
|
-
}
|
174
|
-
}
|
175
|
-
|
176
|
-
@Override
|
177
|
-
public void finish() {
|
178
|
-
closeCurrentStream();
|
179
|
-
}
|
180
|
-
|
181
|
-
@Override
|
182
|
-
public void close() {
|
183
|
-
closeCurrentStream();
|
184
|
-
}
|
185
|
-
|
186
|
-
@Override
|
187
|
-
public void abort() {
|
188
|
-
}
|
189
|
-
|
190
|
-
@Override
|
191
|
-
public TaskReport commit() {
|
192
|
-
try {
|
193
|
-
fs.rename(new Path(workingPath), new Path(outputPath));
|
194
|
-
logger.info("rename {} => {}", workingPath, outputPath);
|
195
|
-
} catch (IOException e) {
|
196
|
-
logger.error(e.getMessage());
|
197
|
-
throw Throwables.propagate(e);
|
198
|
-
}
|
199
|
-
|
200
|
-
TaskReport report = Exec.newTaskReport();
|
201
|
-
report.set("files", currentPath);
|
202
|
-
return report;
|
203
|
-
}
|
204
|
-
|
205
|
-
private void closeCurrentStream() {
|
206
|
-
try {
|
207
|
-
if (currentStream != null) {
|
208
|
-
currentStream.close();
|
209
|
-
currentStream = null;
|
210
|
-
}
|
211
|
-
|
212
|
-
callCount = 0;
|
213
|
-
} catch (IOException e) {
|
214
|
-
logger.error(e.getMessage());
|
215
|
-
throw Throwables.propagate(e);
|
216
|
-
}
|
217
|
-
}
|
218
|
-
}
|
219
|
-
}
|