embulk-executor-mapreduce 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/build.gradle +2 -0
- data/classpath/activation-1.1.jar +0 -0
- data/classpath/apacheds-i18n-2.0.0-M15.jar +0 -0
- data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar +0 -0
- data/classpath/api-asn1-api-1.0.0-M20.jar +0 -0
- data/classpath/api-util-1.0.0-M20.jar +0 -0
- data/classpath/avro-1.7.4.jar +0 -0
- data/classpath/commons-beanutils-1.7.0.jar +0 -0
- data/classpath/commons-cli-1.2.jar +0 -0
- data/classpath/commons-codec-1.6.jar +0 -0
- data/classpath/commons-collections-3.2.1.jar +0 -0
- data/classpath/commons-compress-1.4.1.jar +0 -0
- data/classpath/commons-configuration-1.6.jar +0 -0
- data/classpath/commons-digester-1.8.jar +0 -0
- data/classpath/commons-httpclient-3.1.jar +0 -0
- data/classpath/commons-io-2.4.jar +0 -0
- data/classpath/commons-lang-2.6.jar +0 -0
- data/classpath/commons-logging-1.1.3.jar +0 -0
- data/classpath/commons-math3-3.1.1.jar +0 -0
- data/classpath/commons-net-3.1.jar +0 -0
- data/classpath/curator-client-2.6.0.jar +0 -0
- data/classpath/curator-framework-2.6.0.jar +0 -0
- data/classpath/curator-recipes-2.6.0.jar +0 -0
- data/classpath/embulk-executor-mapreduce-0.1.0.jar +0 -0
- data/classpath/gson-2.2.4.jar +0 -0
- data/classpath/hadoop-annotations-2.6.0.jar +0 -0
- data/classpath/hadoop-auth-2.6.0.jar +0 -0
- data/classpath/hadoop-client-2.6.0.jar +0 -0
- data/classpath/hadoop-common-2.6.0.jar +0 -0
- data/classpath/hadoop-hdfs-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-app-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-common-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-core-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-jobclient-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-shuffle-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-api-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-client-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-common-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-server-common-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-server-nodemanager-2.6.0.jar +0 -0
- data/classpath/htrace-core-3.0.4.jar +0 -0
- data/classpath/httpclient-4.2.5.jar +0 -0
- data/classpath/httpcore-4.2.4.jar +0 -0
- data/classpath/jackson-core-asl-1.9.13.jar +0 -0
- data/classpath/jackson-jaxrs-1.9.13.jar +0 -0
- data/classpath/jackson-mapper-asl-1.9.13.jar +0 -0
- data/classpath/jackson-xc-1.9.13.jar +0 -0
- data/classpath/jaxb-api-2.2.2.jar +0 -0
- data/classpath/jaxb-impl-2.2.3-1.jar +0 -0
- data/classpath/jersey-client-1.9.jar +0 -0
- data/classpath/jersey-core-1.9.jar +0 -0
- data/classpath/jersey-guice-1.9.jar +0 -0
- data/classpath/jersey-json-1.9.jar +0 -0
- data/classpath/jersey-server-1.9.jar +0 -0
- data/classpath/jettison-1.1.jar +0 -0
- data/classpath/jetty-util-6.1.26.jar +0 -0
- data/classpath/jline-0.9.94.jar +0 -0
- data/classpath/jsr305-1.3.9.jar +0 -0
- data/classpath/leveldbjni-all-1.8.jar +0 -0
- data/classpath/netty-3.7.0.Final.jar +0 -0
- data/classpath/paranamer-2.3.jar +0 -0
- data/classpath/protobuf-java-2.5.0.jar +0 -0
- data/classpath/servlet-api-2.5.jar +0 -0
- data/classpath/snappy-java-1.0.4.1.jar +0 -0
- data/classpath/stax-api-1.0-2.jar +0 -0
- data/classpath/xmlenc-0.52.jar +0 -0
- data/classpath/xz-1.0.jar +0 -0
- data/classpath/zookeeper-3.4.6.jar +0 -0
- data/lib/embulk/executor/mapreduce.rb +3 -0
- data/src/main/java/org/embulk/executor/mapreduce/AttemptState.java +154 -0
- data/src/main/java/org/embulk/executor/mapreduce/BufferWritable.java +74 -0
- data/src/main/java/org/embulk/executor/mapreduce/BufferedPagePartitioner.java +158 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkInputFormat.java +37 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkInputSplit.java +61 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java +359 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkPartitioningMapReduce.java +303 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkRecordReader.java +63 -0
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +391 -0
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutorTask.java +60 -0
- data/src/main/java/org/embulk/executor/mapreduce/PageWritable.java +66 -0
- data/src/main/java/org/embulk/executor/mapreduce/PartitionKey.java +11 -0
- data/src/main/java/org/embulk/executor/mapreduce/Partitioner.java +11 -0
- data/src/main/java/org/embulk/executor/mapreduce/Partitioning.java +12 -0
- data/src/main/java/org/embulk/executor/mapreduce/PluginArchive.java +189 -0
- data/src/main/java/org/embulk/executor/mapreduce/RemoteTaskFailedException.java +10 -0
- data/src/main/java/org/embulk/executor/mapreduce/SetContextClassLoader.java +19 -0
- data/src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java +291 -0
- metadata +131 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
package org.embulk.executor.mapreduce;
|
|
2
|
+
|
|
3
|
+
import java.io.DataInput;
|
|
4
|
+
import java.io.DataOutput;
|
|
5
|
+
import java.io.IOException;
|
|
6
|
+
import org.apache.hadoop.io.Writable;
|
|
7
|
+
import org.apache.hadoop.mapreduce.InputSplit;
|
|
8
|
+
|
|
9
|
+
public class EmbulkInputSplit
|
|
10
|
+
extends InputSplit
|
|
11
|
+
implements Writable
|
|
12
|
+
{
|
|
13
|
+
private int[] taskIndexes;
|
|
14
|
+
|
|
15
|
+
public EmbulkInputSplit()
|
|
16
|
+
{
|
|
17
|
+
this(new int[0]);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
public EmbulkInputSplit(int[] taskIndexes)
|
|
21
|
+
{
|
|
22
|
+
this.taskIndexes = taskIndexes;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
public int[] getTaskIndexes()
|
|
26
|
+
{
|
|
27
|
+
return taskIndexes;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
@Override
|
|
31
|
+
public long getLength()
|
|
32
|
+
{
|
|
33
|
+
return taskIndexes.length;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@Override
|
|
37
|
+
public String[] getLocations()
|
|
38
|
+
{
|
|
39
|
+
return new String[0];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@Override
|
|
43
|
+
public void write(DataOutput out) throws IOException
|
|
44
|
+
{
|
|
45
|
+
out.writeInt(taskIndexes.length);
|
|
46
|
+
for (int taskIndex : taskIndexes) {
|
|
47
|
+
out.writeInt(taskIndex);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@Override
|
|
52
|
+
public void readFields(DataInput in) throws IOException
|
|
53
|
+
{
|
|
54
|
+
int c = in.readInt();
|
|
55
|
+
int[] taskIndexes = new int[c];
|
|
56
|
+
for (int i=0; i < c; i++) {
|
|
57
|
+
taskIndexes[i] = in.readInt();
|
|
58
|
+
}
|
|
59
|
+
this.taskIndexes = taskIndexes;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
package org.embulk.executor.mapreduce;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.util.ArrayList;
|
|
5
|
+
import java.util.concurrent.ExecutionException;
|
|
6
|
+
import java.io.File;
|
|
7
|
+
import java.io.IOException;
|
|
8
|
+
import com.google.inject.Injector;
|
|
9
|
+
import com.google.common.base.Optional;
|
|
10
|
+
import com.google.common.base.Throwables;
|
|
11
|
+
import com.google.common.collect.ImmutableList;
|
|
12
|
+
import com.fasterxml.jackson.core.JsonFactory;
|
|
13
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
14
|
+
import org.jruby.embed.ScriptingContainer;
|
|
15
|
+
import org.apache.hadoop.fs.Path;
|
|
16
|
+
import org.apache.hadoop.fs.FileStatus;
|
|
17
|
+
import org.apache.hadoop.fs.FSDataInputStream;
|
|
18
|
+
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
19
|
+
import org.apache.hadoop.fs.LocalDirAllocator;
|
|
20
|
+
import org.apache.hadoop.io.IntWritable;
|
|
21
|
+
import org.apache.hadoop.io.NullWritable;
|
|
22
|
+
import org.apache.hadoop.conf.Configuration;
|
|
23
|
+
import org.apache.hadoop.mapreduce.Job;
|
|
24
|
+
import org.apache.hadoop.mapreduce.JobContext;
|
|
25
|
+
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
|
26
|
+
import org.apache.hadoop.mapreduce.Mapper;
|
|
27
|
+
import org.apache.hadoop.mapreduce.Reducer;
|
|
28
|
+
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
|
29
|
+
import org.apache.hadoop.mapreduce.MRConfig;
|
|
30
|
+
import org.embulk.config.ModelManager;
|
|
31
|
+
import org.embulk.config.ConfigSource;
|
|
32
|
+
import org.embulk.config.ConfigLoader;
|
|
33
|
+
import org.embulk.config.CommitReport;
|
|
34
|
+
import org.embulk.spi.BufferAllocator;
|
|
35
|
+
import org.embulk.spi.Exec;
|
|
36
|
+
import org.embulk.spi.ExecAction;
|
|
37
|
+
import org.embulk.spi.ExecSession;
|
|
38
|
+
import org.embulk.spi.ProcessTask;
|
|
39
|
+
import org.embulk.spi.util.Executors;
|
|
40
|
+
import org.embulk.EmbulkService;
|
|
41
|
+
|
|
42
|
+
public class EmbulkMapReduce
|
|
43
|
+
{
|
|
44
|
+
private static final String CK_SYSTEM_CONFIG = "embulk.mapreduce.systemConfig";
|
|
45
|
+
private static final String CK_STATE_DIRECTORY_PATH = "embulk.mapreduce.stateDirectorypath";
|
|
46
|
+
private static final String CK_TASK_COUNT = "embulk.mapreduce.taskCount";
|
|
47
|
+
private static final String CK_TASK = "embulk.mapreduce.task";
|
|
48
|
+
private static final String CK_PLUGIN_ARCHIVE_SPECS = "embulk.mapreduce.pluginArchive.specs";
|
|
49
|
+
private static final String PLUGIN_ARCHIVE_FILE_NAME = "gems.zip";
|
|
50
|
+
|
|
51
|
+
public static void setSystemConfig(Configuration config, ModelManager modelManager, ConfigSource systemConfig)
|
|
52
|
+
{
|
|
53
|
+
config.set(CK_SYSTEM_CONFIG, modelManager.writeObject(systemConfig));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
public static ConfigSource getSystemConfig(Configuration config)
|
|
57
|
+
{
|
|
58
|
+
try {
|
|
59
|
+
ModelManager bootstrapModelManager = new ModelManager(null, new ObjectMapper());
|
|
60
|
+
return new ConfigLoader(bootstrapModelManager).fromJson(
|
|
61
|
+
new JsonFactory().createParser(config.get(CK_SYSTEM_CONFIG))); // TODO add fromJson(String)
|
|
62
|
+
} catch (IOException e) {
|
|
63
|
+
throw Throwables.propagate(e);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
public static void setMapTaskCount(Configuration config, int taskCount)
|
|
68
|
+
{
|
|
69
|
+
config.setInt(CK_TASK_COUNT, taskCount);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
public static int getMapTaskCount(Configuration config)
|
|
73
|
+
{
|
|
74
|
+
return config.getInt(CK_TASK_COUNT, 0);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
public static void setStateDirectoryPath(Configuration config, Path path)
|
|
78
|
+
{
|
|
79
|
+
config.set(CK_STATE_DIRECTORY_PATH, path.toString());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
public static Path getStateDirectoryPath(Configuration config)
|
|
83
|
+
{
|
|
84
|
+
return new Path(config.get(CK_STATE_DIRECTORY_PATH));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
public static void setExecutorTask(Configuration config, ModelManager modelManager, MapReduceExecutorTask task)
|
|
88
|
+
{
|
|
89
|
+
config.set(CK_TASK, modelManager.writeObject(task));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
public static MapReduceExecutorTask getExecutorTask(Injector injector, Configuration config)
|
|
93
|
+
{
|
|
94
|
+
return injector.getInstance(ModelManager.class).readObject(MapReduceExecutorTask.class,
|
|
95
|
+
config.get(CK_TASK));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
public static Injector newEmbulkInstance(Configuration config)
|
|
99
|
+
{
|
|
100
|
+
ConfigSource systemConfig = getSystemConfig(config);
|
|
101
|
+
return new EmbulkService(systemConfig).getInjector();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
public static List<TaskAttemptID> listAttempts(Configuration config,
|
|
105
|
+
Path stateDir) throws IOException
|
|
106
|
+
{
|
|
107
|
+
FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
|
|
108
|
+
ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
|
|
109
|
+
for (FileStatus stat : stats) {
|
|
110
|
+
if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
|
|
111
|
+
String name = stat.getPath().getName();
|
|
112
|
+
try {
|
|
113
|
+
builder.add(TaskAttemptID.forName(name));
|
|
114
|
+
} catch (IllegalArgumentException ex) {
|
|
115
|
+
// ignore
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return builder.build();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
public static PluginArchive readPluginArchive(File localDirectory, Configuration config,
|
|
123
|
+
Path stateDir, ModelManager modelManager) throws IOException
|
|
124
|
+
{
|
|
125
|
+
List<PluginArchive.GemSpec> specs = modelManager.readObject(
|
|
126
|
+
new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
|
|
127
|
+
config.get(CK_PLUGIN_ARCHIVE_SPECS));
|
|
128
|
+
Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
|
|
129
|
+
try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
|
|
130
|
+
return PluginArchive.load(localDirectory, specs, in);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
public static void writePluginArchive(Configuration config, Path stateDir,
|
|
135
|
+
PluginArchive archive, ModelManager modelManager) throws IOException
|
|
136
|
+
{
|
|
137
|
+
Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
|
|
138
|
+
try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
|
|
139
|
+
List<PluginArchive.GemSpec> specs = archive.dump(out);
|
|
140
|
+
config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
public static AttemptState readAttemptStateFile(Configuration config,
|
|
145
|
+
Path stateDir, TaskAttemptID id, ModelManager modelManager) throws IOException
|
|
146
|
+
{
|
|
147
|
+
Path path = new Path(stateDir, id.toString());
|
|
148
|
+
try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
|
|
149
|
+
return AttemptState.readFrom(in, modelManager);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
public static void writeAttemptStateFile(Configuration config,
|
|
154
|
+
Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
|
|
155
|
+
{
|
|
156
|
+
Path path = new Path(stateDir, state.getAttemptId().toString());
|
|
157
|
+
try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
|
|
158
|
+
state.writeTo(out, modelManager);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
public static class SessionRunner
|
|
163
|
+
{
|
|
164
|
+
private final Configuration config;
|
|
165
|
+
private final Injector injector;
|
|
166
|
+
private final ModelManager modelManager;
|
|
167
|
+
private final MapReduceExecutorTask task;
|
|
168
|
+
private final ExecSession session;
|
|
169
|
+
private final File localGemPath;
|
|
170
|
+
|
|
171
|
+
public SessionRunner(TaskAttemptContext context)
|
|
172
|
+
{
|
|
173
|
+
this.config = context.getConfiguration();
|
|
174
|
+
this.injector = newEmbulkInstance(context.getConfiguration());
|
|
175
|
+
this.modelManager = injector.getInstance(ModelManager.class);
|
|
176
|
+
this.task = getExecutorTask(injector, context.getConfiguration());
|
|
177
|
+
this.session = new ExecSession(injector, task.getExecConfig());
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
|
|
181
|
+
Path destPath = localDirAllocator.getLocalPathForWrite("gems", config);
|
|
182
|
+
this.localGemPath = new File(destPath.toString());
|
|
183
|
+
} catch (IOException ex) {
|
|
184
|
+
throw new RuntimeException(ex);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
public PluginArchive readPluginArchive() throws IOException
|
|
189
|
+
{
|
|
190
|
+
localGemPath.mkdirs();
|
|
191
|
+
return EmbulkMapReduce.readPluginArchive(localGemPath, config, getStateDirectoryPath(config), modelManager);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
public Configuration getConfiguration()
|
|
195
|
+
{
|
|
196
|
+
return config;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
public ModelManager getModelManager()
|
|
200
|
+
{
|
|
201
|
+
return modelManager;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
public BufferAllocator getBufferAllocator()
|
|
205
|
+
{
|
|
206
|
+
return injector.getInstance(BufferAllocator.class);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
public ScriptingContainer getScriptingContainer()
|
|
210
|
+
{
|
|
211
|
+
return injector.getInstance(ScriptingContainer.class);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
public MapReduceExecutorTask getMapReduceExecutorTask()
|
|
215
|
+
{
|
|
216
|
+
return task;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
public ExecSession getExecSession()
|
|
220
|
+
{
|
|
221
|
+
return session;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
public <T> T execSession(ExecAction<T> action) throws IOException, InterruptedException
|
|
225
|
+
{
|
|
226
|
+
try {
|
|
227
|
+
return Exec.doWith(session, action);
|
|
228
|
+
} catch (ExecutionException e) {
|
|
229
|
+
Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
|
|
230
|
+
Throwables.propagateIfInstanceOf(e.getCause(), InterruptedException.class);
|
|
231
|
+
throw Throwables.propagate(e.getCause());
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
public void deleteTempFiles()
|
|
236
|
+
{
|
|
237
|
+
// TODO delete localGemPath
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
public static class AttemptStateUpdateHandler
|
|
242
|
+
implements Executors.ProcessStateCallback
|
|
243
|
+
{
|
|
244
|
+
private final Configuration config;
|
|
245
|
+
private final Path stateDir;
|
|
246
|
+
private final ModelManager modelManager;
|
|
247
|
+
private final AttemptState state;
|
|
248
|
+
|
|
249
|
+
public AttemptStateUpdateHandler(SessionRunner runner, AttemptState state)
|
|
250
|
+
{
|
|
251
|
+
this.config = runner.getConfiguration();
|
|
252
|
+
this.stateDir = getStateDirectoryPath(config);
|
|
253
|
+
this.state = state;
|
|
254
|
+
this.modelManager = runner.getModelManager();
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
@Override
|
|
258
|
+
public void started()
|
|
259
|
+
{
|
|
260
|
+
try {
|
|
261
|
+
writeAttemptStateFile(config, stateDir, state, modelManager);
|
|
262
|
+
} catch (IOException e) {
|
|
263
|
+
throw new RuntimeException(e);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
@Override
|
|
268
|
+
public void inputCommitted(CommitReport report)
|
|
269
|
+
{
|
|
270
|
+
state.setInputCommitReport(report);
|
|
271
|
+
try {
|
|
272
|
+
writeAttemptStateFile(config, stateDir, state, modelManager);
|
|
273
|
+
} catch (IOException e) {
|
|
274
|
+
throw new RuntimeException(e);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
@Override
|
|
279
|
+
public void outputCommitted(CommitReport report)
|
|
280
|
+
{
|
|
281
|
+
state.setOutputCommitReport(report);
|
|
282
|
+
try {
|
|
283
|
+
writeAttemptStateFile(config, stateDir, state, modelManager);
|
|
284
|
+
} catch (IOException e) {
|
|
285
|
+
throw new RuntimeException(e);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
public void setException(Throwable ex) throws IOException
|
|
290
|
+
{
|
|
291
|
+
state.setException(ex);
|
|
292
|
+
writeAttemptStateFile(config, stateDir, state, modelManager);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
public static class EmbulkMapper
|
|
297
|
+
extends Mapper<IntWritable, NullWritable, NullWritable, NullWritable>
|
|
298
|
+
{
|
|
299
|
+
private Context context;
|
|
300
|
+
private SessionRunner runner;
|
|
301
|
+
|
|
302
|
+
@Override
|
|
303
|
+
public void setup(Context context) throws IOException
|
|
304
|
+
{
|
|
305
|
+
this.context = context;
|
|
306
|
+
this.runner = new SessionRunner(context);
|
|
307
|
+
runner.readPluginArchive().restoreLoadPathsTo(runner.getScriptingContainer());
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
@Override
|
|
311
|
+
public void map(IntWritable key, NullWritable value, final Context context) throws IOException, InterruptedException
|
|
312
|
+
{
|
|
313
|
+
final int taskIndex = key.get();
|
|
314
|
+
|
|
315
|
+
runner.execSession(new ExecAction<Void>() {
|
|
316
|
+
public Void run() throws Exception
|
|
317
|
+
{
|
|
318
|
+
process(context, taskIndex);
|
|
319
|
+
return null;
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
private void process(final Context context, int taskIndex) throws IOException, InterruptedException
|
|
325
|
+
{
|
|
326
|
+
ProcessTask task = runner.getMapReduceExecutorTask().getProcessTask();
|
|
327
|
+
|
|
328
|
+
AttemptStateUpdateHandler handler = new AttemptStateUpdateHandler(runner,
|
|
329
|
+
new AttemptState(context.getTaskAttemptID(), Optional.of(taskIndex), Optional.of(taskIndex)));
|
|
330
|
+
|
|
331
|
+
try {
|
|
332
|
+
Executors.process(runner.getExecSession(), task, taskIndex, handler);
|
|
333
|
+
} catch (Throwable ex) {
|
|
334
|
+
try {
|
|
335
|
+
handler.setException(ex);
|
|
336
|
+
} catch (Throwable e) {
|
|
337
|
+
e.addSuppressed(ex);
|
|
338
|
+
throw e;
|
|
339
|
+
}
|
|
340
|
+
//if (task.getTaskRecovery()) {
|
|
341
|
+
// throw ex;
|
|
342
|
+
//}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
public static class EmbulkReducer
|
|
348
|
+
extends Reducer<NullWritable, NullWritable, NullWritable, NullWritable>
|
|
349
|
+
{
|
|
350
|
+
private IntWritable result = new IntWritable();
|
|
351
|
+
|
|
352
|
+
@Override
|
|
353
|
+
public void reduce(NullWritable key, Iterable<NullWritable> values, Context context)
|
|
354
|
+
throws IOException, InterruptedException
|
|
355
|
+
{
|
|
356
|
+
// do nothing
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|