embulk-output-hdfs 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/README.md +11 -8
- data/build.gradle +5 -5
- data/classpath/embulk-output-hdfs-0.2.0.jar +0 -0
- data/lib/embulk/output/hdfs.rb +1 -1
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +198 -0
- data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +5 -0
- metadata +8 -8
- data/classpath/embulk-output-hdfs-0.1.2.jar +0 -0
- data/src/main/java/org/embulk/output/HdfsOutputPlugin.java +0 -219
- data/src/test/java/org/embulk/output/TestHdfsOutputPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e23e49bd7a7a7cb1587c929faf9979fe3ae5cf94
|
4
|
+
data.tar.gz: 13833b306d2d69da411177c6f4a094955b572f79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ac5ec5966dd880ed625d85afdaa8bd587dcf03aadfa4c2464f5bb78034dea247d263197ced5a1130d3d394a801dd9ea3d5a91180973cc4ced4dd43c332c1a10
|
7
|
+
data.tar.gz: 82f2d8d22029f356925e7412d9a891317d5b6c4fa667787ce2df46fd7195db4aaf813410c705e590ba21958fcb9242a81ca6b4e04d2a4904c4770eacf23f95eb
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
# Hdfs output plugin for Embulk
|
1
|
+
# Hdfs file output plugin for Embulk
|
2
2
|
|
3
3
|
A File Output Plugin for Embulk to write HDFS.
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
7
7
|
* **Plugin type**: file output
|
8
|
-
* **Load all or nothing**:
|
8
|
+
* **Load all or nothing**: yes
|
9
9
|
* **Resume supported**: no
|
10
10
|
* **Cleanup supported**: no
|
11
11
|
|
@@ -13,8 +13,12 @@ A File Output Plugin for Embulk to write HDFS.
|
|
13
13
|
|
14
14
|
- **config_files** list of paths to Hadoop's configuration files (array of strings, default: `[]`)
|
15
15
|
- **config** overwrites configuration parameters (hash, default: `{}`)
|
16
|
-
- **
|
17
|
-
- **
|
16
|
+
- **path_prefix** prefix of target files (string, required)
|
17
|
+
- **file_ext** suffix of target files (string, required)
|
18
|
+
- **sequence_format** format for sequence part of target files (string, default: `'.%03d.%02d'`)
|
19
|
+
- **rewind_seconds** When you use Date format in path_prefix property(like `/tmp/embulk/%Y-%m-%d/out`), the format is interpreted by using the time which is Now minus this property. (int, default: `0`)
|
20
|
+
- **overwrite** overwrite files when the same filenames already exists (boolean, default: `false`)
|
21
|
+
- *caution*: even if this property is `true`, this does not mean ensuring the idempotence. if you want to ensure the idempotence, you need the procedures to remove output files after or before running.
|
18
22
|
|
19
23
|
## Example
|
20
24
|
|
@@ -24,14 +28,13 @@ out:
|
|
24
28
|
config_files:
|
25
29
|
- /etc/hadoop/conf/core-site.xml
|
26
30
|
- /etc/hadoop/conf/hdfs-site.xml
|
27
|
-
- /etc/hadoop/conf/mapred-site.xml
|
28
|
-
- /etc/hadoop/conf/yarn-site.xml
|
29
31
|
config:
|
30
32
|
fs.defaultFS: 'hdfs://hdp-nn1:8020'
|
31
|
-
dfs.replication: 1
|
32
|
-
mapreduce.client.submit.file.replication: 1
|
33
33
|
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
|
34
34
|
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
|
35
|
+
path_prefix: '/tmp/embulk/hdfs_output/%Y-%m-%d/out'
|
36
|
+
file_ext: 'txt'
|
37
|
+
overwrite: true
|
35
38
|
formatter:
|
36
39
|
type: csv
|
37
40
|
encoding: UTF-8
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ configurations {
|
|
12
12
|
provided
|
13
13
|
}
|
14
14
|
|
15
|
-
version = "0.
|
15
|
+
version = "0.2.0"
|
16
16
|
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
@@ -22,7 +22,7 @@ dependencies {
|
|
22
22
|
provided "org.embulk:embulk-core:0.7.0"
|
23
23
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
24
24
|
compile 'org.apache.hadoop:hadoop-client:2.6.0'
|
25
|
-
compile 'com.google.guava:guava:
|
25
|
+
compile 'com.google.guava:guava:15.0'
|
26
26
|
testCompile "junit:junit:4.+"
|
27
27
|
}
|
28
28
|
|
@@ -57,9 +57,9 @@ task gemspec {
|
|
57
57
|
Gem::Specification.new do |spec|
|
58
58
|
spec.name = "${project.name}"
|
59
59
|
spec.version = "${project.version}"
|
60
|
-
spec.authors = ["
|
61
|
-
spec.summary = %[Hdfs output plugin for Embulk]
|
62
|
-
spec.description = %[
|
60
|
+
spec.authors = ["Civitaspo"]
|
61
|
+
spec.summary = %[Hdfs file output plugin for Embulk]
|
62
|
+
spec.description = %[Stores files on Hdfs.]
|
63
63
|
spec.email = ["civitaspo@gmail.com"]
|
64
64
|
spec.licenses = ["MIT"]
|
65
65
|
spec.homepage = "https://github.com/civitaspo/embulk-output-hdfs"
|
Binary file
|
data/lib/embulk/output/hdfs.rb
CHANGED
@@ -0,0 +1,198 @@
|
|
1
|
+
package org.embulk.output.hdfs;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.io.OutputStream;
|
5
|
+
import java.util.ArrayList;
|
6
|
+
import java.util.List;
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
import org.apache.hadoop.conf.Configuration;
|
10
|
+
import org.apache.hadoop.fs.FileSystem;
|
11
|
+
import org.apache.hadoop.fs.Path;
|
12
|
+
import org.embulk.config.TaskReport;
|
13
|
+
import org.embulk.config.Config;
|
14
|
+
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.config.Task;
|
18
|
+
import org.embulk.config.TaskSource;
|
19
|
+
import org.embulk.spi.Buffer;
|
20
|
+
import org.embulk.spi.Exec;
|
21
|
+
import org.embulk.spi.FileOutputPlugin;
|
22
|
+
import org.embulk.spi.TransactionalFileOutput;
|
23
|
+
import org.jruby.embed.ScriptingContainer;
|
24
|
+
import org.slf4j.Logger;
|
25
|
+
|
26
|
+
public class HdfsFileOutputPlugin
|
27
|
+
implements FileOutputPlugin
|
28
|
+
{
|
29
|
+
private static final Logger logger = Exec.getLogger(HdfsFileOutputPlugin.class);
|
30
|
+
|
31
|
+
public interface PluginTask
|
32
|
+
extends Task
|
33
|
+
{
|
34
|
+
@Config("config_files")
|
35
|
+
@ConfigDefault("[]")
|
36
|
+
public List<String> getConfigFiles();
|
37
|
+
|
38
|
+
@Config("config")
|
39
|
+
@ConfigDefault("{}")
|
40
|
+
public Map<String, String> getConfig();
|
41
|
+
|
42
|
+
@Config("path_prefix")
|
43
|
+
public String getPathPrefix();
|
44
|
+
|
45
|
+
@Config("file_ext")
|
46
|
+
public String getFileNameExtension();
|
47
|
+
|
48
|
+
@Config("sequence_format")
|
49
|
+
@ConfigDefault("\"%03d.%02d.\"")
|
50
|
+
public String getSequenceFormat();
|
51
|
+
|
52
|
+
@Config("rewind_seconds")
|
53
|
+
@ConfigDefault("0")
|
54
|
+
public int getRewindSeconds();
|
55
|
+
|
56
|
+
@Config("overwrite")
|
57
|
+
@ConfigDefault("false")
|
58
|
+
public boolean getOverwrite();
|
59
|
+
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
64
|
+
FileOutputPlugin.Control control)
|
65
|
+
{
|
66
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
67
|
+
|
68
|
+
control.run(task.dump());
|
69
|
+
return Exec.newConfigDiff();
|
70
|
+
}
|
71
|
+
|
72
|
+
@Override
|
73
|
+
public ConfigDiff resume(TaskSource taskSource,
|
74
|
+
int taskCount,
|
75
|
+
FileOutputPlugin.Control control)
|
76
|
+
{
|
77
|
+
throw new UnsupportedOperationException("hdfs output plugin does not support resuming");
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public void cleanup(TaskSource taskSource,
|
82
|
+
int taskCount,
|
83
|
+
List<TaskReport> successTaskReports)
|
84
|
+
{
|
85
|
+
}
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
89
|
+
{
|
90
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
91
|
+
|
92
|
+
final String pathPrefix = strftime(task.getPathPrefix(), task.getRewindSeconds());
|
93
|
+
final String pathSuffix = task.getFileNameExtension();
|
94
|
+
final String sequenceFormat = task.getSequenceFormat();
|
95
|
+
|
96
|
+
return new TransactionalFileOutput()
|
97
|
+
{
|
98
|
+
private final List<String> hdfsFileNames = new ArrayList<>();
|
99
|
+
private int fileIndex = 0;
|
100
|
+
private OutputStream output = null;
|
101
|
+
|
102
|
+
@Override
|
103
|
+
public void nextFile()
|
104
|
+
{
|
105
|
+
closeCurrentStream();
|
106
|
+
Path path = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
|
107
|
+
try {
|
108
|
+
FileSystem fs = getFs(task);
|
109
|
+
output = fs.create(path, task.getOverwrite());
|
110
|
+
logger.info("Uploading '{}'", path);
|
111
|
+
}
|
112
|
+
catch (IOException e) {
|
113
|
+
logger.error(e.getMessage());
|
114
|
+
throw new RuntimeException(e);
|
115
|
+
}
|
116
|
+
hdfsFileNames.add(path.toString());
|
117
|
+
fileIndex++;
|
118
|
+
}
|
119
|
+
|
120
|
+
@Override
|
121
|
+
public void add(Buffer buffer)
|
122
|
+
{
|
123
|
+
try {
|
124
|
+
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
125
|
+
}
|
126
|
+
catch (IOException e) {
|
127
|
+
throw new RuntimeException(e);
|
128
|
+
}
|
129
|
+
finally {
|
130
|
+
buffer.release();
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void finish()
|
136
|
+
{
|
137
|
+
closeCurrentStream();
|
138
|
+
}
|
139
|
+
|
140
|
+
@Override
|
141
|
+
public void close()
|
142
|
+
{
|
143
|
+
closeCurrentStream();
|
144
|
+
}
|
145
|
+
|
146
|
+
@Override
|
147
|
+
public void abort()
|
148
|
+
{
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
public TaskReport commit()
|
153
|
+
{
|
154
|
+
TaskReport report = Exec.newTaskReport();
|
155
|
+
report.set("hdfs_file_names", hdfsFileNames);
|
156
|
+
return report;
|
157
|
+
}
|
158
|
+
|
159
|
+
private void closeCurrentStream()
|
160
|
+
{
|
161
|
+
if (output != null) {
|
162
|
+
try {
|
163
|
+
output.close();
|
164
|
+
output = null;
|
165
|
+
}
|
166
|
+
catch (IOException e) {
|
167
|
+
throw new RuntimeException(e);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
};
|
172
|
+
}
|
173
|
+
|
174
|
+
private static FileSystem getFs(final PluginTask task)
|
175
|
+
throws IOException
|
176
|
+
{
|
177
|
+
Configuration configuration = new Configuration();
|
178
|
+
|
179
|
+
for (Object configFile : task.getConfigFiles()) {
|
180
|
+
configuration.addResource(configFile.toString());
|
181
|
+
}
|
182
|
+
configuration.reloadConfiguration();
|
183
|
+
|
184
|
+
for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
|
185
|
+
configuration.set(entry.getKey(), entry.getValue());
|
186
|
+
}
|
187
|
+
|
188
|
+
return FileSystem.get(configuration);
|
189
|
+
}
|
190
|
+
|
191
|
+
private String strftime(final String raw, final int rewind_seconds)
|
192
|
+
{
|
193
|
+
ScriptingContainer jruby = new ScriptingContainer();
|
194
|
+
Object resolved = jruby.runScriptlet(
|
195
|
+
String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewind_seconds), raw));
|
196
|
+
return resolved.toString();
|
197
|
+
}
|
198
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
version: '10.0'
|
39
39
|
prerelease: false
|
40
40
|
type: :development
|
41
|
-
description:
|
41
|
+
description: Stores files on Hdfs.
|
42
42
|
email:
|
43
43
|
- civitaspo@gmail.com
|
44
44
|
executables: []
|
@@ -54,8 +54,8 @@ files:
|
|
54
54
|
- gradlew
|
55
55
|
- gradlew.bat
|
56
56
|
- lib/embulk/output/hdfs.rb
|
57
|
-
- src/main/java/org/embulk/output/
|
58
|
-
- src/test/java/org/embulk/output/
|
57
|
+
- src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java
|
58
|
+
- src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java
|
59
59
|
- classpath/activation-1.1.jar
|
60
60
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
61
61
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
@@ -79,7 +79,7 @@ files:
|
|
79
79
|
- classpath/curator-client-2.6.0.jar
|
80
80
|
- classpath/curator-framework-2.6.0.jar
|
81
81
|
- classpath/curator-recipes-2.6.0.jar
|
82
|
-
- classpath/embulk-output-hdfs-0.
|
82
|
+
- classpath/embulk-output-hdfs-0.2.0.jar
|
83
83
|
- classpath/gson-2.2.4.jar
|
84
84
|
- classpath/hadoop-annotations-2.6.0.jar
|
85
85
|
- classpath/hadoop-auth-2.6.0.jar
|
@@ -151,5 +151,5 @@ rubyforge_project:
|
|
151
151
|
rubygems_version: 2.1.9
|
152
152
|
signing_key:
|
153
153
|
specification_version: 4
|
154
|
-
summary: Hdfs output plugin for Embulk
|
154
|
+
summary: Hdfs file output plugin for Embulk
|
155
155
|
test_files: []
|
Binary file
|
@@ -1,219 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import com.google.common.base.Throwables;
|
4
|
-
import org.apache.hadoop.conf.Configuration;
|
5
|
-
import org.apache.hadoop.fs.FileSystem;
|
6
|
-
import org.apache.hadoop.fs.Path;
|
7
|
-
import org.embulk.config.*;
|
8
|
-
import org.embulk.spi.Buffer;
|
9
|
-
import org.embulk.spi.Exec;
|
10
|
-
import org.embulk.spi.FileOutputPlugin;
|
11
|
-
import org.embulk.spi.TransactionalFileOutput;
|
12
|
-
import org.jruby.embed.ScriptingContainer;
|
13
|
-
import org.slf4j.Logger;
|
14
|
-
|
15
|
-
import java.io.IOException;
|
16
|
-
import java.io.OutputStream;
|
17
|
-
import java.util.List;
|
18
|
-
import java.util.Map;
|
19
|
-
|
20
|
-
public class HdfsOutputPlugin implements FileOutputPlugin
|
21
|
-
{
|
22
|
-
private static final Logger logger = Exec.getLogger(HdfsOutputPlugin.class);
|
23
|
-
|
24
|
-
public interface PluginTask extends Task
|
25
|
-
{
|
26
|
-
@Config("config_files")
|
27
|
-
@ConfigDefault("[]")
|
28
|
-
public List<String> getConfigFiles();
|
29
|
-
|
30
|
-
@Config("config")
|
31
|
-
@ConfigDefault("{}")
|
32
|
-
public Map<String, String> getConfig();
|
33
|
-
|
34
|
-
@Config("sequence_format")
|
35
|
-
@ConfigDefault("\"%03d.%02d\"")
|
36
|
-
public String getSequenceFormat();
|
37
|
-
|
38
|
-
@Config("output_path")
|
39
|
-
@ConfigDefault("\"/tmp/embulk.output.hdfs_output.%Y%m%d_%s\"")
|
40
|
-
public String getOutputPath();
|
41
|
-
|
42
|
-
@Config("working_path")
|
43
|
-
@ConfigDefault("\"/tmp/embulk.working.hdfs_output.%Y%m%d_%s\"")
|
44
|
-
public String getWorkingPath();
|
45
|
-
|
46
|
-
}
|
47
|
-
|
48
|
-
@Override
|
49
|
-
public ConfigDiff transaction(ConfigSource config,
|
50
|
-
int taskCount,
|
51
|
-
FileOutputPlugin.Control control)
|
52
|
-
{
|
53
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
54
|
-
return resume(task.dump(), taskCount, control);
|
55
|
-
}
|
56
|
-
|
57
|
-
@Override
|
58
|
-
public ConfigDiff resume(TaskSource taskSource,
|
59
|
-
int taskCount,
|
60
|
-
FileOutputPlugin.Control control)
|
61
|
-
{
|
62
|
-
control.run(taskSource);
|
63
|
-
return Exec.newConfigDiff();
|
64
|
-
}
|
65
|
-
|
66
|
-
|
67
|
-
@Override
|
68
|
-
public void cleanup(TaskSource taskSource,
|
69
|
-
int taskCount,
|
70
|
-
List<TaskReport> successTaskReports)
|
71
|
-
{
|
72
|
-
}
|
73
|
-
|
74
|
-
@Override
|
75
|
-
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
76
|
-
{
|
77
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
78
|
-
|
79
|
-
Configuration configuration = getHdfsConfiguration(task);
|
80
|
-
FileSystem fs = getFs(configuration);
|
81
|
-
String workingPath = strftime(task.getWorkingPath());
|
82
|
-
String outputPath = strftime(task.getOutputPath());
|
83
|
-
return new TransactionalHdfsFileOutput(task, fs, workingPath, outputPath, taskIndex);
|
84
|
-
}
|
85
|
-
|
86
|
-
private Configuration getHdfsConfiguration(final PluginTask task)
|
87
|
-
{
|
88
|
-
Configuration configuration = new Configuration();
|
89
|
-
|
90
|
-
List configFiles = task.getConfigFiles();
|
91
|
-
for (Object configFile : configFiles) {
|
92
|
-
configuration.addResource(configFile.toString());
|
93
|
-
}
|
94
|
-
|
95
|
-
for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
|
96
|
-
configuration.set(entry.getKey(), entry.getValue());
|
97
|
-
}
|
98
|
-
|
99
|
-
return configuration;
|
100
|
-
}
|
101
|
-
|
102
|
-
private FileSystem getFs(final Configuration configuration) {
|
103
|
-
try {
|
104
|
-
FileSystem fs = FileSystem.get(configuration);
|
105
|
-
return fs;
|
106
|
-
}
|
107
|
-
catch (IOException e) {
|
108
|
-
logger.error(e.getMessage());
|
109
|
-
throw Throwables.propagate(e);
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
private String strftime(final String path)
|
114
|
-
{
|
115
|
-
// strftime
|
116
|
-
ScriptingContainer jruby = new ScriptingContainer();
|
117
|
-
Object result = jruby.runScriptlet("Time.now.strftime('" + path + "')");
|
118
|
-
return result.toString();
|
119
|
-
}
|
120
|
-
|
121
|
-
static class TransactionalHdfsFileOutput implements TransactionalFileOutput
|
122
|
-
{
|
123
|
-
private final int taskIndex;
|
124
|
-
private final FileSystem fs;
|
125
|
-
private final String workingPath;
|
126
|
-
private final String outputPath;
|
127
|
-
private final String sequenceFormat;
|
128
|
-
|
129
|
-
private int fileIndex = 0;
|
130
|
-
private int callCount = 0;
|
131
|
-
private Path currentPath = null;
|
132
|
-
private OutputStream currentStream = null;
|
133
|
-
|
134
|
-
public TransactionalHdfsFileOutput(PluginTask task, FileSystem fs, String workingPath, String outputPath, int taskIndex)
|
135
|
-
{
|
136
|
-
this.taskIndex = taskIndex;
|
137
|
-
this.fs = fs;
|
138
|
-
this.workingPath = workingPath;
|
139
|
-
this.outputPath = outputPath;
|
140
|
-
this.sequenceFormat = task.getSequenceFormat();
|
141
|
-
}
|
142
|
-
|
143
|
-
public void nextFile() {
|
144
|
-
closeCurrentStream();
|
145
|
-
currentPath = new Path(workingPath + '/' + String.format(sequenceFormat, taskIndex, fileIndex));
|
146
|
-
try {
|
147
|
-
if (fs.exists(currentPath)) {
|
148
|
-
throw new IllegalAccessException(currentPath.toString() + "already exists.");
|
149
|
-
}
|
150
|
-
currentStream = fs.create(currentPath);
|
151
|
-
logger.info("Uploading '{}'", currentPath.toString());
|
152
|
-
}
|
153
|
-
catch (IOException | IllegalAccessException e) {
|
154
|
-
logger.error(e.getMessage());
|
155
|
-
throw Throwables.propagate(e);
|
156
|
-
}
|
157
|
-
fileIndex++;
|
158
|
-
}
|
159
|
-
|
160
|
-
@Override
|
161
|
-
public void add(Buffer buffer) {
|
162
|
-
if (currentStream == null) {
|
163
|
-
throw new IllegalStateException("nextFile() must be called before poll()");
|
164
|
-
}
|
165
|
-
try {
|
166
|
-
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
167
|
-
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
168
|
-
callCount++;
|
169
|
-
} catch (IOException e) {
|
170
|
-
throw new RuntimeException(e);
|
171
|
-
} finally {
|
172
|
-
buffer.release();
|
173
|
-
}
|
174
|
-
}
|
175
|
-
|
176
|
-
@Override
|
177
|
-
public void finish() {
|
178
|
-
closeCurrentStream();
|
179
|
-
}
|
180
|
-
|
181
|
-
@Override
|
182
|
-
public void close() {
|
183
|
-
closeCurrentStream();
|
184
|
-
}
|
185
|
-
|
186
|
-
@Override
|
187
|
-
public void abort() {
|
188
|
-
}
|
189
|
-
|
190
|
-
@Override
|
191
|
-
public TaskReport commit() {
|
192
|
-
try {
|
193
|
-
fs.rename(new Path(workingPath), new Path(outputPath));
|
194
|
-
logger.info("rename {} => {}", workingPath, outputPath);
|
195
|
-
} catch (IOException e) {
|
196
|
-
logger.error(e.getMessage());
|
197
|
-
throw Throwables.propagate(e);
|
198
|
-
}
|
199
|
-
|
200
|
-
TaskReport report = Exec.newTaskReport();
|
201
|
-
report.set("files", currentPath);
|
202
|
-
return report;
|
203
|
-
}
|
204
|
-
|
205
|
-
private void closeCurrentStream() {
|
206
|
-
try {
|
207
|
-
if (currentStream != null) {
|
208
|
-
currentStream.close();
|
209
|
-
currentStream = null;
|
210
|
-
}
|
211
|
-
|
212
|
-
callCount = 0;
|
213
|
-
} catch (IOException e) {
|
214
|
-
logger.error(e.getMessage());
|
215
|
-
throw Throwables.propagate(e);
|
216
|
-
}
|
217
|
-
}
|
218
|
-
}
|
219
|
-
}
|