embulk-executor-mapreduce 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/asm-3.1.jar +0 -0
- data/classpath/{embulk-executor-mapreduce-0.1.0.jar → embulk-executor-mapreduce-0.1.1.jar} +0 -0
- data/src/main/java/org/embulk/executor/mapreduce/AttemptState.java +1 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java +53 -5
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +31 -12
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8a1f6914bd6836006726de5d2f8badef02c614b
|
4
|
+
data.tar.gz: 97b14a2720664e78424dd8974865c97bbb4165de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b9c87ea48d10b8cab86e60aa251f0ce4d10720350cbe57c8ad229cb5a5e00d98ba05cb8e68e63218da9c619a34c100ef9da3e71bfd895a97b6d09e2ac816db4
|
7
|
+
data.tar.gz: 996dd45f438a2e420649627130021d20a295ce540611974bb50f79dc961b0f341adcce1ca944bd6facfd751bbfc430e944861e03f91fb57f7b785cf03c098735
|
Binary file
|
Binary file
|
@@ -144,6 +144,7 @@ public class AttemptState
|
|
144
144
|
|
145
145
|
public static AttemptState readFrom(InputStream in, ModelManager modelManager) throws IOException
|
146
146
|
{
|
147
|
+
// If InputStream contains partial JSON (like '{"key":"va'), this method throws EOFException
|
147
148
|
Scanner s = new Scanner(in, "UTF-8").useDelimiter("\\A"); // TODO
|
148
149
|
if (s.hasNext()) {
|
149
150
|
return modelManager.readObject(AttemptState.class, s.next());
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.executor.mapreduce;
|
2
2
|
|
3
|
+
import java.io.EOFException;
|
4
|
+
import java.io.InterruptedIOException;
|
3
5
|
import java.util.List;
|
4
6
|
import java.util.ArrayList;
|
5
7
|
import java.util.concurrent.ExecutionException;
|
@@ -37,7 +39,12 @@ import org.embulk.spi.ExecAction;
|
|
37
39
|
import org.embulk.spi.ExecSession;
|
38
40
|
import org.embulk.spi.ProcessTask;
|
39
41
|
import org.embulk.spi.util.Executors;
|
42
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
43
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
40
44
|
import org.embulk.EmbulkService;
|
45
|
+
import org.slf4j.Logger;
|
46
|
+
|
47
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
41
48
|
|
42
49
|
public class EmbulkMapReduce
|
43
50
|
{
|
@@ -141,12 +148,52 @@ public class EmbulkMapReduce
|
|
141
148
|
}
|
142
149
|
}
|
143
150
|
|
144
|
-
public static AttemptState readAttemptStateFile(Configuration config,
|
145
|
-
Path stateDir, TaskAttemptID id, ModelManager modelManager) throws IOException
|
151
|
+
public static AttemptState readAttemptStateFile(final Configuration config,
|
152
|
+
Path stateDir, TaskAttemptID id, final ModelManager modelManager) throws IOException
|
146
153
|
{
|
147
|
-
|
148
|
-
|
149
|
-
|
154
|
+
final Logger log = Exec.getLogger(EmbulkMapReduce.class);
|
155
|
+
final Path path = new Path(stateDir, id.toString());
|
156
|
+
try {
|
157
|
+
return retryExecutor()
|
158
|
+
.withRetryLimit(5)
|
159
|
+
.withInitialRetryWait(2 * 1000)
|
160
|
+
.withMaxRetryWait(20 * 1000)
|
161
|
+
.runInterruptible(new Retryable<AttemptState>() {
|
162
|
+
@Override
|
163
|
+
public AttemptState call() throws IOException {
|
164
|
+
try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
|
165
|
+
return AttemptState.readFrom(in, modelManager);
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
@Override
|
170
|
+
public boolean isRetryableException(Exception exception) {
|
171
|
+
// AttemptState.readFrom throws 2 types of exceptions:
|
172
|
+
// a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
|
173
|
+
// b) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
|
174
|
+
// c) other IOException: FileSystem is not working
|
175
|
+
//
|
176
|
+
// a) and b) are temporary problem which is not critical. c) could be temporary problem and it is critical.
|
177
|
+
// Here retries regardless of the exception type because we can't distinguish b) from c).
|
178
|
+
return true;
|
179
|
+
}
|
180
|
+
|
181
|
+
@Override
|
182
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
183
|
+
throws RetryGiveupException {
|
184
|
+
log.warn("Retrying opening state file " + path.getName() + " error: " + exception);
|
185
|
+
}
|
186
|
+
|
187
|
+
@Override
|
188
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
189
|
+
throws RetryGiveupException {
|
190
|
+
}
|
191
|
+
});
|
192
|
+
} catch (RetryGiveupException e) {
|
193
|
+
Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
|
194
|
+
throw Throwables.propagate(e.getCause());
|
195
|
+
} catch (InterruptedException e) {
|
196
|
+
throw new InterruptedIOException();
|
150
197
|
}
|
151
198
|
}
|
152
199
|
|
@@ -154,6 +201,7 @@ public class EmbulkMapReduce
|
|
154
201
|
Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
|
155
202
|
{
|
156
203
|
Path path = new Path(stateDir, state.getAttemptId().toString());
|
204
|
+
// TODO retry file create and write
|
157
205
|
try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
|
158
206
|
state.writeTo(out, modelManager);
|
159
207
|
}
|
@@ -221,12 +221,14 @@ public class MapReduceExecutor
|
|
221
221
|
job.mapProgress() * 100, job.reduceProgress() * 100));
|
222
222
|
Thread.sleep(interval);
|
223
223
|
|
224
|
-
updateProcessState(job, mapTaskCount, stateDir, state, modelManager);
|
224
|
+
updateProcessState(job, mapTaskCount, stateDir, state, modelManager, true);
|
225
225
|
}
|
226
226
|
|
227
|
+
// Here sets skipUnavailable=false to updateProcessState method because race
|
228
|
+
// condition of AttemptReport.readFrom and .writeTo does not happen here.
|
227
229
|
log.info(String.format("map %.1f%% reduce %.1f%%",
|
228
230
|
job.mapProgress() * 100, job.reduceProgress() * 100));
|
229
|
-
updateProcessState(job, mapTaskCount, stateDir, state, modelManager);
|
231
|
+
updateProcessState(job, mapTaskCount, stateDir, state, modelManager, false);
|
230
232
|
|
231
233
|
Counters counters = job.getCounters();
|
232
234
|
if (counters != null) {
|
@@ -291,7 +293,7 @@ public class MapReduceExecutor
|
|
291
293
|
}
|
292
294
|
|
293
295
|
private void updateProcessState(Job job, int mapTaskCount, Path stateDir,
|
294
|
-
ProcessState state, ModelManager modelManager) throws IOException
|
296
|
+
ProcessState state, ModelManager modelManager, boolean skipUnavailable) throws IOException
|
295
297
|
{
|
296
298
|
List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager);
|
297
299
|
|
@@ -299,8 +301,12 @@ public class MapReduceExecutor
|
|
299
301
|
if (report == null) {
|
300
302
|
continue;
|
301
303
|
}
|
302
|
-
if (!report.
|
303
|
-
|
304
|
+
if (!report.isAvailable()) {
|
305
|
+
if (skipUnavailable) {
|
306
|
+
continue;
|
307
|
+
} else {
|
308
|
+
throw report.getUnavailableException();
|
309
|
+
}
|
304
310
|
}
|
305
311
|
AttemptState attempt = report.getAttemptState();
|
306
312
|
if (attempt.getInputTaskIndex().isPresent()) {
|
@@ -338,23 +344,32 @@ public class MapReduceExecutor
|
|
338
344
|
{
|
339
345
|
private final TaskAttemptID attemptId;
|
340
346
|
private final AttemptState attemptState;
|
347
|
+
private final IOException unavailableException;
|
341
348
|
|
342
|
-
public AttemptReport(TaskAttemptID attemptId)
|
349
|
+
public AttemptReport(TaskAttemptID attemptId, AttemptState attemptState)
|
343
350
|
{
|
344
|
-
this
|
351
|
+
this.attemptId = attemptId;
|
352
|
+
this.attemptState = attemptState;
|
353
|
+
this.unavailableException = null;
|
345
354
|
}
|
346
355
|
|
347
|
-
public AttemptReport(TaskAttemptID attemptId,
|
356
|
+
public AttemptReport(TaskAttemptID attemptId, IOException unavailableException)
|
348
357
|
{
|
349
358
|
this.attemptId = attemptId;
|
350
|
-
this.attemptState =
|
359
|
+
this.attemptState = null;
|
360
|
+
this.unavailableException = unavailableException;
|
351
361
|
}
|
352
362
|
|
353
|
-
public boolean
|
363
|
+
public boolean isAvailable()
|
354
364
|
{
|
355
365
|
return attemptState != null;
|
356
366
|
}
|
357
367
|
|
368
|
+
public IOException getUnavailableException()
|
369
|
+
{
|
370
|
+
return unavailableException;
|
371
|
+
}
|
372
|
+
|
358
373
|
public boolean isInputCommitted()
|
359
374
|
{
|
360
375
|
return attemptState != null && attemptState.getInputCommitReport().isPresent();
|
@@ -382,8 +397,12 @@ public class MapReduceExecutor
|
|
382
397
|
AttemptState state = EmbulkMapReduce.readAttemptStateFile(config,
|
383
398
|
stateDir, aid, modelManager);
|
384
399
|
builder.add(new AttemptReport(aid, state));
|
385
|
-
} catch (
|
386
|
-
|
400
|
+
} catch (IOException ex) {
|
401
|
+
// Either of:
|
402
|
+
// * race condition of AttemptReport.writeTo and .readFrom
|
403
|
+
// * FileSystem is not working
|
404
|
+
// See also comments on MapReduceExecutor.readAttemptStateFile.isRetryableException.
|
405
|
+
builder.add(new AttemptReport(aid, ex));
|
387
406
|
}
|
388
407
|
}
|
389
408
|
return builder.build();
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-executor-mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Executes tasks on Hadoop.
|
14
14
|
email:
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
43
43
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
44
44
|
- classpath/api-util-1.0.0-M20.jar
|
45
|
+
- classpath/asm-3.1.jar
|
45
46
|
- classpath/avro-1.7.4.jar
|
46
47
|
- classpath/commons-beanutils-1.7.0.jar
|
47
48
|
- classpath/commons-cli-1.2.jar
|
@@ -59,7 +60,7 @@ files:
|
|
59
60
|
- classpath/curator-client-2.6.0.jar
|
60
61
|
- classpath/curator-framework-2.6.0.jar
|
61
62
|
- classpath/curator-recipes-2.6.0.jar
|
62
|
-
- classpath/embulk-executor-mapreduce-0.1.
|
63
|
+
- classpath/embulk-executor-mapreduce-0.1.1.jar
|
63
64
|
- classpath/gson-2.2.4.jar
|
64
65
|
- classpath/hadoop-annotations-2.6.0.jar
|
65
66
|
- classpath/hadoop-auth-2.6.0.jar
|