embulk-executor-mapreduce 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/classpath/asm-3.1.jar +0 -0
- data/classpath/{embulk-executor-mapreduce-0.1.0.jar → embulk-executor-mapreduce-0.1.1.jar} +0 -0
- data/src/main/java/org/embulk/executor/mapreduce/AttemptState.java +1 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java +53 -5
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +31 -12
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8a1f6914bd6836006726de5d2f8badef02c614b
|
4
|
+
data.tar.gz: 97b14a2720664e78424dd8974865c97bbb4165de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b9c87ea48d10b8cab86e60aa251f0ce4d10720350cbe57c8ad229cb5a5e00d98ba05cb8e68e63218da9c619a34c100ef9da3e71bfd895a97b6d09e2ac816db4
|
7
|
+
data.tar.gz: 996dd45f438a2e420649627130021d20a295ce540611974bb50f79dc961b0f341adcce1ca944bd6facfd751bbfc430e944861e03f91fb57f7b785cf03c098735
|
Binary file
|
Binary file
|
@@ -144,6 +144,7 @@ public class AttemptState
|
|
144
144
|
|
145
145
|
public static AttemptState readFrom(InputStream in, ModelManager modelManager) throws IOException
|
146
146
|
{
|
147
|
+
// If InputStream contains partial JSON (like '{"key":"va'), this method throws EOFException
|
147
148
|
Scanner s = new Scanner(in, "UTF-8").useDelimiter("\\A"); // TODO
|
148
149
|
if (s.hasNext()) {
|
149
150
|
return modelManager.readObject(AttemptState.class, s.next());
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.executor.mapreduce;
|
2
2
|
|
3
|
+
import java.io.EOFException;
|
4
|
+
import java.io.InterruptedIOException;
|
3
5
|
import java.util.List;
|
4
6
|
import java.util.ArrayList;
|
5
7
|
import java.util.concurrent.ExecutionException;
|
@@ -37,7 +39,12 @@ import org.embulk.spi.ExecAction;
|
|
37
39
|
import org.embulk.spi.ExecSession;
|
38
40
|
import org.embulk.spi.ProcessTask;
|
39
41
|
import org.embulk.spi.util.Executors;
|
42
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
43
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
40
44
|
import org.embulk.EmbulkService;
|
45
|
+
import org.slf4j.Logger;
|
46
|
+
|
47
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
41
48
|
|
42
49
|
public class EmbulkMapReduce
|
43
50
|
{
|
@@ -141,12 +148,52 @@ public class EmbulkMapReduce
|
|
141
148
|
}
|
142
149
|
}
|
143
150
|
|
144
|
-
public static AttemptState readAttemptStateFile(Configuration config,
|
145
|
-
Path stateDir, TaskAttemptID id, ModelManager modelManager) throws IOException
|
151
|
+
public static AttemptState readAttemptStateFile(final Configuration config,
|
152
|
+
Path stateDir, TaskAttemptID id, final ModelManager modelManager) throws IOException
|
146
153
|
{
|
147
|
-
|
148
|
-
|
149
|
-
|
154
|
+
final Logger log = Exec.getLogger(EmbulkMapReduce.class);
|
155
|
+
final Path path = new Path(stateDir, id.toString());
|
156
|
+
try {
|
157
|
+
return retryExecutor()
|
158
|
+
.withRetryLimit(5)
|
159
|
+
.withInitialRetryWait(2 * 1000)
|
160
|
+
.withMaxRetryWait(20 * 1000)
|
161
|
+
.runInterruptible(new Retryable<AttemptState>() {
|
162
|
+
@Override
|
163
|
+
public AttemptState call() throws IOException {
|
164
|
+
try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
|
165
|
+
return AttemptState.readFrom(in, modelManager);
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
@Override
|
170
|
+
public boolean isRetryableException(Exception exception) {
|
171
|
+
// AttemptState.readFrom throws 2 types of exceptions:
|
172
|
+
// a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
|
173
|
+
// b) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
|
174
|
+
// c) other IOException: FileSystem is not working
|
175
|
+
//
|
176
|
+
// a) and b) are temporary problem which is not critical. c) could be temporary problem and it is critical.
|
177
|
+
// Here retries regardless of the exception type because we can't distinguish b) from c).
|
178
|
+
return true;
|
179
|
+
}
|
180
|
+
|
181
|
+
@Override
|
182
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
183
|
+
throws RetryGiveupException {
|
184
|
+
log.warn("Retrying opening state file " + path.getName() + " error: " + exception);
|
185
|
+
}
|
186
|
+
|
187
|
+
@Override
|
188
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
189
|
+
throws RetryGiveupException {
|
190
|
+
}
|
191
|
+
});
|
192
|
+
} catch (RetryGiveupException e) {
|
193
|
+
Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
|
194
|
+
throw Throwables.propagate(e.getCause());
|
195
|
+
} catch (InterruptedException e) {
|
196
|
+
throw new InterruptedIOException();
|
150
197
|
}
|
151
198
|
}
|
152
199
|
|
@@ -154,6 +201,7 @@ public class EmbulkMapReduce
|
|
154
201
|
Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
|
155
202
|
{
|
156
203
|
Path path = new Path(stateDir, state.getAttemptId().toString());
|
204
|
+
// TODO retry file create and write
|
157
205
|
try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
|
158
206
|
state.writeTo(out, modelManager);
|
159
207
|
}
|
@@ -221,12 +221,14 @@ public class MapReduceExecutor
|
|
221
221
|
job.mapProgress() * 100, job.reduceProgress() * 100));
|
222
222
|
Thread.sleep(interval);
|
223
223
|
|
224
|
-
updateProcessState(job, mapTaskCount, stateDir, state, modelManager);
|
224
|
+
updateProcessState(job, mapTaskCount, stateDir, state, modelManager, true);
|
225
225
|
}
|
226
226
|
|
227
|
+
// Here sets skipUnavailable=false to updateProcessState method because race
|
228
|
+
// condition of AttemptReport.readFrom and .writeTo does not happen here.
|
227
229
|
log.info(String.format("map %.1f%% reduce %.1f%%",
|
228
230
|
job.mapProgress() * 100, job.reduceProgress() * 100));
|
229
|
-
updateProcessState(job, mapTaskCount, stateDir, state, modelManager);
|
231
|
+
updateProcessState(job, mapTaskCount, stateDir, state, modelManager, false);
|
230
232
|
|
231
233
|
Counters counters = job.getCounters();
|
232
234
|
if (counters != null) {
|
@@ -291,7 +293,7 @@ public class MapReduceExecutor
|
|
291
293
|
}
|
292
294
|
|
293
295
|
private void updateProcessState(Job job, int mapTaskCount, Path stateDir,
|
294
|
-
ProcessState state, ModelManager modelManager) throws IOException
|
296
|
+
ProcessState state, ModelManager modelManager, boolean skipUnavailable) throws IOException
|
295
297
|
{
|
296
298
|
List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager);
|
297
299
|
|
@@ -299,8 +301,12 @@ public class MapReduceExecutor
|
|
299
301
|
if (report == null) {
|
300
302
|
continue;
|
301
303
|
}
|
302
|
-
if (!report.
|
303
|
-
|
304
|
+
if (!report.isAvailable()) {
|
305
|
+
if (skipUnavailable) {
|
306
|
+
continue;
|
307
|
+
} else {
|
308
|
+
throw report.getUnavailableException();
|
309
|
+
}
|
304
310
|
}
|
305
311
|
AttemptState attempt = report.getAttemptState();
|
306
312
|
if (attempt.getInputTaskIndex().isPresent()) {
|
@@ -338,23 +344,32 @@ public class MapReduceExecutor
|
|
338
344
|
{
|
339
345
|
private final TaskAttemptID attemptId;
|
340
346
|
private final AttemptState attemptState;
|
347
|
+
private final IOException unavailableException;
|
341
348
|
|
342
|
-
public AttemptReport(TaskAttemptID attemptId)
|
349
|
+
public AttemptReport(TaskAttemptID attemptId, AttemptState attemptState)
|
343
350
|
{
|
344
|
-
this
|
351
|
+
this.attemptId = attemptId;
|
352
|
+
this.attemptState = attemptState;
|
353
|
+
this.unavailableException = null;
|
345
354
|
}
|
346
355
|
|
347
|
-
public AttemptReport(TaskAttemptID attemptId,
|
356
|
+
public AttemptReport(TaskAttemptID attemptId, IOException unavailableException)
|
348
357
|
{
|
349
358
|
this.attemptId = attemptId;
|
350
|
-
this.attemptState =
|
359
|
+
this.attemptState = null;
|
360
|
+
this.unavailableException = unavailableException;
|
351
361
|
}
|
352
362
|
|
353
|
-
public boolean
|
363
|
+
public boolean isAvailable()
|
354
364
|
{
|
355
365
|
return attemptState != null;
|
356
366
|
}
|
357
367
|
|
368
|
+
public IOException getUnavailableException()
|
369
|
+
{
|
370
|
+
return unavailableException;
|
371
|
+
}
|
372
|
+
|
358
373
|
public boolean isInputCommitted()
|
359
374
|
{
|
360
375
|
return attemptState != null && attemptState.getInputCommitReport().isPresent();
|
@@ -382,8 +397,12 @@ public class MapReduceExecutor
|
|
382
397
|
AttemptState state = EmbulkMapReduce.readAttemptStateFile(config,
|
383
398
|
stateDir, aid, modelManager);
|
384
399
|
builder.add(new AttemptReport(aid, state));
|
385
|
-
} catch (
|
386
|
-
|
400
|
+
} catch (IOException ex) {
|
401
|
+
// Either of:
|
402
|
+
// * race condition of AttemptReport.writeTo and .readFrom
|
403
|
+
// * FileSystem is not working
|
404
|
+
// See also comments on MapReduceExecutor.readAttemptStateFile.isRetryableException.
|
405
|
+
builder.add(new AttemptReport(aid, ex));
|
387
406
|
}
|
388
407
|
}
|
389
408
|
return builder.build();
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-executor-mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Executes tasks on Hadoop.
|
14
14
|
email:
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
43
43
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
44
44
|
- classpath/api-util-1.0.0-M20.jar
|
45
|
+
- classpath/asm-3.1.jar
|
45
46
|
- classpath/avro-1.7.4.jar
|
46
47
|
- classpath/commons-beanutils-1.7.0.jar
|
47
48
|
- classpath/commons-cli-1.2.jar
|
@@ -59,7 +60,7 @@ files:
|
|
59
60
|
- classpath/curator-client-2.6.0.jar
|
60
61
|
- classpath/curator-framework-2.6.0.jar
|
61
62
|
- classpath/curator-recipes-2.6.0.jar
|
62
|
-
- classpath/embulk-executor-mapreduce-0.1.
|
63
|
+
- classpath/embulk-executor-mapreduce-0.1.1.jar
|
63
64
|
- classpath/gson-2.2.4.jar
|
64
65
|
- classpath/hadoop-annotations-2.6.0.jar
|
65
66
|
- classpath/hadoop-auth-2.6.0.jar
|