embulk-executor-mapreduce 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 05feb97f9e21b2feec1b0e2d1517d17797d68c54
4
- data.tar.gz: f7ddeeee84821d58ca377d7d396ea403b2a27b27
3
+ metadata.gz: dc83806412506bc567037cdf24a1c247a99abf13
4
+ data.tar.gz: 10a3dd696c3729f58a0c4e2b0ec5b0217cbdfcc1
5
5
  SHA512:
6
- metadata.gz: e3213d0c7269f68824c94a06cdeca185deeb9ebdc5b518569c99893b75a8ab9b3bb55f4bc93295844891e1f12ecb48fed649f16d8ea2220d65470b3810a3af47
7
- data.tar.gz: 9b3104672bc7b400d8096850925e6e843d8c713fbd5b4adc88347102f7dd2256734fc6fd9904c7e6e9d6f76b61db9dd25c8707fe0555fa1318568a94fd72843f
6
+ metadata.gz: f4ef4b1809a3acf01d0cf449efd4bc026fc77e60bd3aba8708102b0118c06a4b5a7bfd06ce70f497d0121da86ddfe9012dc20ab152c0c192ba0dad1eb80065be
7
+ data.tar.gz: 0999ab7bc7eb9fa1e71e61212c6be680c9e5fa232ea1cd4d57f45a8e7228e41ea4d4ee8c1e794de64b5caf77232264126979560fe09988bb55d264bfa2839e70
@@ -1,15 +1,18 @@
1
1
  package org.embulk.executor.mapreduce;
2
2
 
3
- import java.io.EOFException;
4
- import java.io.InterruptedIOException;
5
3
  import java.util.List;
6
4
  import java.util.ArrayList;
5
+ import java.util.concurrent.Callable;
7
6
  import java.util.concurrent.ExecutionException;
8
7
  import java.io.File;
9
8
  import java.io.IOException;
9
+ import java.io.EOFException;
10
+ import java.io.InterruptedIOException;
11
+ import java.lang.reflect.InvocationTargetException;
10
12
  import com.google.inject.Injector;
11
13
  import com.google.common.base.Optional;
12
14
  import com.google.common.base.Throwables;
15
+ import com.google.common.base.Throwables;
13
16
  import com.google.common.collect.ImmutableList;
14
17
  import com.fasterxml.jackson.core.JsonFactory;
15
18
  import com.fasterxml.jackson.databind.ObjectMapper;
@@ -24,6 +27,7 @@ import org.apache.hadoop.io.NullWritable;
24
27
  import org.apache.hadoop.conf.Configuration;
25
28
  import org.apache.hadoop.mapreduce.Job;
26
29
  import org.apache.hadoop.mapreduce.JobContext;
30
+ import org.apache.hadoop.mapreduce.Counters;
27
31
  import org.apache.hadoop.mapreduce.TaskAttemptID;
28
32
  import org.apache.hadoop.mapreduce.Mapper;
29
33
  import org.apache.hadoop.mapreduce.Reducer;
@@ -48,11 +52,14 @@ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
48
52
 
49
53
  public class EmbulkMapReduce
50
54
  {
55
+ private static final String SYSTEM_CONFIG_SERVICE_CLASS = "mapreduce_service_class";
56
+
51
57
  private static final String CK_SYSTEM_CONFIG = "embulk.mapreduce.systemConfig";
52
58
  private static final String CK_STATE_DIRECTORY_PATH = "embulk.mapreduce.stateDirectorypath";
53
59
  private static final String CK_TASK_COUNT = "embulk.mapreduce.taskCount";
54
60
  private static final String CK_TASK = "embulk.mapreduce.task";
55
61
  private static final String CK_PLUGIN_ARCHIVE_SPECS = "embulk.mapreduce.pluginArchive.specs";
62
+
56
63
  private static final String PLUGIN_ARCHIVE_FILE_NAME = "gems.zip";
57
64
 
58
65
  public static void setSystemConfig(Configuration config, ModelManager modelManager, ConfigSource systemConfig)
@@ -105,51 +112,157 @@ public class EmbulkMapReduce
105
112
  public static Injector newEmbulkInstance(Configuration config)
106
113
  {
107
114
  ConfigSource systemConfig = getSystemConfig(config);
108
- return new EmbulkService(systemConfig).getInjector();
115
+ String serviceClassName = systemConfig.get(String.class, SYSTEM_CONFIG_SERVICE_CLASS, "org.embulk.EmbulkService");
116
+
117
+ try {
118
+ Object obj;
119
+ if (serviceClassName.equals("org.embulk.EmbulkService")) {
120
+ obj = new EmbulkService(systemConfig);
121
+ } else {
122
+ Class<?> serviceClass = Class.forName(serviceClassName);
123
+ obj = serviceClass.getConstructor(ConfigSource.class).newInstance(systemConfig);
124
+ }
125
+
126
+ if (obj instanceof EmbulkService) {
127
+ return ((EmbulkService) obj).getInjector();
128
+ } else {
129
+ return (Injector) obj.getClass().getMethod("getInjector").invoke(obj);
130
+ }
131
+
132
+ } catch (InvocationTargetException ex) {
133
+ throw Throwables.propagate(ex.getCause());
134
+ } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | IllegalArgumentException ex) {
135
+ throw Throwables.propagate(ex);
136
+ }
109
137
  }
110
138
 
111
- public static List<TaskAttemptID> listAttempts(Configuration config,
112
- Path stateDir) throws IOException
139
+ public static class JobStatus
113
140
  {
114
- FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
115
- ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
116
- for (FileStatus stat : stats) {
117
- if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
118
- String name = stat.getPath().getName();
119
- try {
120
- builder.add(TaskAttemptID.forName(name));
121
- } catch (IllegalArgumentException ex) {
122
- // ignore
141
+ private final boolean completed;
142
+ private final float mapProgress;
143
+ private final float reduceProgress;
144
+
145
+ public JobStatus(boolean completed, float mapProgress, float reduceProgress)
146
+ {
147
+ this.completed = completed;
148
+ this.mapProgress = mapProgress;
149
+ this.reduceProgress = reduceProgress;
150
+ }
151
+
152
+ public boolean isComplete()
153
+ {
154
+ return completed;
155
+ }
156
+
157
+ public float getMapProgress()
158
+ {
159
+ return mapProgress;
160
+ }
161
+
162
+ public float getReduceProgress()
163
+ {
164
+ return reduceProgress;
165
+ }
166
+ }
167
+
168
+ public static JobStatus getJobStatus(final Job job) throws IOException
169
+ {
170
+ return hadoopOperationWithRetry("getting job status", new Callable<JobStatus>() {
171
+ public JobStatus call() throws IOException
172
+ {
173
+ return new JobStatus(job.isComplete(), job.mapProgress(), job.reduceProgress());
174
+ }
175
+ });
176
+ }
177
+
178
+ public static Counters getJobCounters(final Job job) throws IOException
179
+ {
180
+ return hadoopOperationWithRetry("getting job counters", new Callable<Counters>() {
181
+ public Counters call() throws IOException
182
+ {
183
+ return job.getCounters();
184
+ }
185
+ });
186
+ }
187
+
188
+ public static List<TaskAttemptID> listAttempts(final Configuration config,
189
+ final Path stateDir) throws IOException
190
+ {
191
+ return hadoopOperationWithRetry("getting list of attempt state files on "+stateDir, new Callable<List<TaskAttemptID>>() {
192
+ public List<TaskAttemptID> call() throws IOException
193
+ {
194
+ FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
195
+ ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
196
+ for (FileStatus stat : stats) {
197
+ if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
198
+ String name = stat.getPath().getName();
199
+ TaskAttemptID id;
200
+ try {
201
+ id = TaskAttemptID.forName(name);
202
+ } catch (Exception ex) {
203
+ // ignore this file
204
+ continue;
205
+ }
206
+ builder.add(id);
207
+ }
123
208
  }
209
+ return builder.build();
124
210
  }
125
- }
126
- return builder.build();
211
+ });
127
212
  }
128
213
 
129
- public static PluginArchive readPluginArchive(File localDirectory, Configuration config,
130
- Path stateDir, ModelManager modelManager) throws IOException
214
+ public static void writePluginArchive(final Configuration config, final Path stateDir,
215
+ final PluginArchive archive, final ModelManager modelManager) throws IOException
131
216
  {
132
- List<PluginArchive.GemSpec> specs = modelManager.readObject(
133
- new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
134
- config.get(CK_PLUGIN_ARCHIVE_SPECS));
135
- Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
136
- try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
137
- return PluginArchive.load(localDirectory, specs, in);
138
- }
217
+ final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
218
+ hadoopOperationWithRetry("writing plugin archive to "+path, new Callable<Void>() {
219
+ public Void call() throws IOException
220
+ {
221
+ stateDir.getFileSystem(config).mkdirs(stateDir);
222
+ try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
223
+ List<PluginArchive.GemSpec> specs = archive.dump(out);
224
+ config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
225
+ }
226
+ return null;
227
+ }
228
+ });
139
229
  }
140
230
 
141
- public static void writePluginArchive(Configuration config, Path stateDir,
142
- PluginArchive archive, ModelManager modelManager) throws IOException
231
+ public static PluginArchive readPluginArchive(final File localDirectory, final Configuration config,
232
+ Path stateDir, final ModelManager modelManager) throws IOException
143
233
  {
144
- Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
145
- try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
146
- List<PluginArchive.GemSpec> specs = archive.dump(out);
147
- config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
148
- }
234
+ final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
235
+ return hadoopOperationWithRetry("reading plugin archive file from "+path, new Callable<PluginArchive>() {
236
+ public PluginArchive call() throws IOException
237
+ {
238
+ List<PluginArchive.GemSpec> specs = modelManager.readObject(
239
+ new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
240
+ config.get(CK_PLUGIN_ARCHIVE_SPECS));
241
+ try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
242
+ return PluginArchive.load(localDirectory, specs, in);
243
+ }
244
+ }
245
+ });
246
+ }
247
+
248
+ public static void writeAttemptStateFile(final Configuration config,
249
+ Path stateDir, final AttemptState state, final ModelManager modelManager) throws IOException
250
+ {
251
+ final Path path = new Path(stateDir, state.getAttemptId().toString());
252
+ hadoopOperationWithRetry("writing attempt state file to "+path, new Callable<Void>() {
253
+ public Void call() throws IOException
254
+ {
255
+ try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
256
+ state.writeTo(out, modelManager);
257
+ }
258
+ return null;
259
+ }
260
+ });
149
261
  }
150
262
 
151
263
  public static AttemptState readAttemptStateFile(final Configuration config,
152
- Path stateDir, TaskAttemptID id, final ModelManager modelManager) throws IOException
264
+ Path stateDir, TaskAttemptID id, final ModelManager modelManager,
265
+ final boolean concurrentWriteIsPossible) throws IOException
153
266
  {
154
267
  final Logger log = Exec.getLogger(EmbulkMapReduce.class);
155
268
  final Path path = new Path(stateDir, id.toString());
@@ -160,34 +273,46 @@ public class EmbulkMapReduce
160
273
  .withMaxRetryWait(20 * 1000)
161
274
  .runInterruptible(new Retryable<AttemptState>() {
162
275
  @Override
163
- public AttemptState call() throws IOException {
276
+ public AttemptState call() throws IOException
277
+ {
164
278
  try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
165
279
  return AttemptState.readFrom(in, modelManager);
166
280
  }
167
281
  }
168
282
 
169
283
  @Override
170
- public boolean isRetryableException(Exception exception) {
171
- // AttemptState.readFrom throws 2 types of exceptions:
172
- // a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
173
- // b) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
174
- // c) other IOException: FileSystem is not working
284
+ public boolean isRetryableException(Exception exception)
285
+ {
286
+ // AttemptState.readFrom throws 4 types of exceptions:
287
+ //
288
+ // concurrentWriteIsPossible == true:
289
+ // a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
290
+ // b) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
291
+ // c) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
292
+ // d) IOException: FileSystem is not working
293
+ // concurrentWriteIsPossible == false:
294
+ // e) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
295
+ // f) IOException: FileSystem is not working
175
296
  //
176
- // a) and b) are temporary problem which is not critical. c) could be temporary problem and it is critical.
177
- // Here retries regardless of the exception type because we can't distinguish b) from c).
297
+ if (exception instanceof EOFException && !concurrentWriteIsPossible) {
298
+ // e) is not recoverable.
299
+ return false;
300
+ }
178
301
  return true;
179
302
  }
180
303
 
181
304
  @Override
182
305
  public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
183
- throws RetryGiveupException {
184
- log.warn("Retrying opening state file " + path.getName() + " error: " + exception);
306
+ throws RetryGiveupException
307
+ {
308
+ log.warn("Retrying opening state file {} ({}/{}) error: {}",
309
+ path, retryCount, retryLimit, exception);
185
310
  }
186
311
 
187
312
  @Override
188
313
  public void onGiveup(Exception firstException, Exception lastException)
189
- throws RetryGiveupException {
190
- }
314
+ throws RetryGiveupException
315
+ { }
191
316
  });
192
317
  } catch (RetryGiveupException e) {
193
318
  Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
@@ -197,13 +322,45 @@ public class EmbulkMapReduce
197
322
  }
198
323
  }
199
324
 
200
- public static void writeAttemptStateFile(Configuration config,
201
- Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
325
+ private static <T> T hadoopOperationWithRetry(final String message, final Callable<T> callable) throws IOException
202
326
  {
203
- Path path = new Path(stateDir, state.getAttemptId().toString());
204
- // TODO retry file create and write
205
- try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
206
- state.writeTo(out, modelManager);
327
+ final Logger log = Exec.getLogger(EmbulkMapReduce.class);
328
+ try {
329
+ return retryExecutor()
330
+ .withRetryLimit(5)
331
+ .withInitialRetryWait(2 * 1000)
332
+ .withMaxRetryWait(20 * 1000)
333
+ .runInterruptible(new Retryable<T>() {
334
+ @Override
335
+ public T call() throws Exception
336
+ {
337
+ return callable.call();
338
+ }
339
+
340
+ @Override
341
+ public boolean isRetryableException(Exception exception)
342
+ {
343
+ return true;
344
+ }
345
+
346
+ @Override
347
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
348
+ throws RetryGiveupException
349
+ {
350
+ log.warn("Retrying {} ({}/{}) error: {}",
351
+ message, retryCount, retryLimit, exception);
352
+ }
353
+
354
+ @Override
355
+ public void onGiveup(Exception firstException, Exception lastException)
356
+ throws RetryGiveupException
357
+ { }
358
+ });
359
+ } catch (RetryGiveupException e) {
360
+ Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
361
+ throw Throwables.propagate(e.getCause());
362
+ } catch (InterruptedException e) {
363
+ throw new InterruptedIOException();
207
364
  }
208
365
  }
209
366
 
@@ -1,9 +1,11 @@
1
1
  package org.embulk.executor.mapreduce;
2
2
 
3
3
  import java.util.List;
4
- import java.util.Map;
4
+ import java.util.Collection;
5
5
  import java.util.Set;
6
+ import java.util.Map;
6
7
  import java.util.HashSet;
8
+ import java.util.HashMap;
7
9
  import java.io.File;
8
10
  import java.io.IOException;
9
11
  import java.io.EOFException;
@@ -29,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
29
31
  import org.apache.hadoop.mapreduce.JobContext;
30
32
  import org.apache.hadoop.mapreduce.Cluster;
31
33
  import org.apache.hadoop.mapreduce.Job;
34
+ import org.apache.hadoop.mapreduce.JobID;
32
35
  import org.apache.hadoop.mapreduce.Counters;
33
36
  import org.apache.hadoop.mapreduce.TaskType;
34
37
  import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -37,6 +40,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
37
40
  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
38
41
  import org.embulk.exec.ForSystemConfig;
39
42
  import org.embulk.config.ConfigSource;
43
+ import org.embulk.config.CommitReport;
40
44
  import org.embulk.config.ConfigException;
41
45
  import org.embulk.config.TaskSource;
42
46
  import org.embulk.config.ModelManager;
@@ -116,6 +120,67 @@ public class MapReduceExecutor
116
120
  }
117
121
  }
118
122
 
123
+ private static class TaskReportSet
124
+ {
125
+ private Map<Integer, AttemptReport> inputTaskReports = new HashMap<>();
126
+ private Map<Integer, AttemptReport> outputTaskReports = new HashMap<>();
127
+
128
+ private final JobID runningJobId;
129
+
130
+ public TaskReportSet(JobID runningJobId)
131
+ {
132
+ this.runningJobId = runningJobId;
133
+ }
134
+
135
+ public Collection<AttemptReport> getLatestInputAttemptReports()
136
+ {
137
+ return inputTaskReports.values();
138
+ }
139
+
140
+ public Collection<AttemptReport> getLatestOutputAttemptReports()
141
+ {
142
+ return outputTaskReports.values();
143
+ }
144
+
145
+ public void update(AttemptReport report)
146
+ {
147
+ if (report.getInputTaskIndex().isPresent()) {
148
+ int taskIndex = report.getInputTaskIndex().get();
149
+ AttemptReport past = inputTaskReports.get(taskIndex);
150
+ if (past == null || checkOverwrite(past, report)) {
151
+ inputTaskReports.put(taskIndex, report);
152
+ }
153
+ }
154
+ if (report.getOutputTaskIndex().isPresent()) {
155
+ int taskIndex = report.getOutputTaskIndex().get();
156
+ AttemptReport past = outputTaskReports.get(taskIndex);
157
+ if (past == null || checkOverwrite(past, report)) {
158
+ outputTaskReports.put(taskIndex, report);
159
+ }
160
+ }
161
+ }
162
+
163
+ private boolean checkOverwrite(AttemptReport past, AttemptReport report)
164
+ {
165
+ // if already committed successfully, use it
166
+ if (!past.isOutputCommitted() && report.isOutputCommitted()) {
167
+ return true;
168
+ }
169
+
170
+ // Here expects that TaskAttemptID.compareTo returns <= 0 if attempt is started later.
171
+ // However, it returns unexpected result if 2 jobs run on different JobTrackers because
172
+ // JobID includes start time of a JobTracker with sequence number in the JobTracker
173
+ // rather than start time of a job. To mitigate this problem, this code assumes that
174
+ // attempts of the running job is always newer.
175
+ boolean pastRunning = past.getTaskAttempId().getJobID().equals(runningJobId);
176
+ boolean reportRunning = report.getTaskAttempId().getJobID().equals(runningJobId);
177
+ if (!pastRunning && reportRunning) {
178
+ return true;
179
+ }
180
+ return past.getTaskAttempId().compareTo(report.getTaskAttempId()) <= 0;
181
+ }
182
+ }
183
+
119
184
  void run(MapReduceExecutorTask task,
120
185
  int mapTaskCount, int reduceTaskCount, ProcessState state)
121
186
  {
@@ -163,14 +228,7 @@ public class MapReduceExecutor
163
228
  EmbulkMapReduce.setMapTaskCount(job.getConfiguration(), mapTaskCount); // used by EmbulkInputFormat
164
229
  EmbulkMapReduce.setStateDirectoryPath(job.getConfiguration(), stateDir);
165
230
 
166
- // create state dir
167
- try {
168
- stateDir.getFileSystem(job.getConfiguration()).mkdirs(stateDir);
169
- } catch (IOException ex) {
170
- throw new RuntimeException(ex);
171
- }
172
-
173
- // archive plugins
231
+ // archive plugins (also create state dir)
174
232
  PluginArchive archive = new PluginArchive.Builder()
175
233
  .addLoadedRubyGems(jruby)
176
234
  .build();
@@ -211,26 +269,33 @@ public class MapReduceExecutor
211
269
 
212
270
  try {
213
271
  job.submit();
272
+ TaskReportSet reportSet = new TaskReportSet(job.getJobID());
214
273
 
215
274
  int interval = Job.getCompletionPollInterval(job.getConfiguration());
216
- while (!job.isComplete()) {
275
+ while (true) {
276
+ EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
277
+ if (status.isComplete()) {
278
+ break;
279
+ }
280
+ log.info(String.format("map %.1f%% reduce %.1f%%",
281
+ status.getMapProgress() * 100, status.getReduceProgress() * 100));
282
+
217
283
  //if (job.getState() == JobStatus.State.PREP) {
218
284
  // continue;
219
285
  //}
220
- log.info(String.format("map %.1f%% reduce %.1f%%",
221
- job.mapProgress() * 100, job.reduceProgress() * 100));
222
286
  Thread.sleep(interval);
223
287
 
224
- updateProcessState(job, mapTaskCount, stateDir, state, modelManager, true);
288
+ updateProcessState(job, reportSet, stateDir, state, modelManager, true);
225
289
  }
226
290
 
227
- // Here sets skipUnavailable=false to updateProcessState method because race
228
- // condition of AttemptReport.readFrom and .writeTo does not happen here.
291
+ EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
229
292
  log.info(String.format("map %.1f%% reduce %.1f%%",
230
- job.mapProgress() * 100, job.reduceProgress() * 100));
231
- updateProcessState(job, mapTaskCount, stateDir, state, modelManager, false);
293
+ status.getMapProgress() * 100, status.getReduceProgress() * 100));
294
+ // Here sets inProgress=false to updateProcessState method to tell that race
295
+ // condition of AttemptReport.readFrom and .writeTo does not happen here.
296
+ updateProcessState(job, reportSet, stateDir, state, modelManager, false);
232
297
 
233
- Counters counters = job.getCounters();
298
+ Counters counters = EmbulkMapReduce.getJobCounters(job);
234
299
  if (counters != null) {
235
300
  log.info(counters.toString());
236
301
  }
@@ -292,50 +357,39 @@ public class MapReduceExecutor
292
357
  + String.format("%09d", time.getNano());
293
358
  }
294
359
 
295
- private void updateProcessState(Job job, int mapTaskCount, Path stateDir,
296
- ProcessState state, ModelManager modelManager, boolean skipUnavailable) throws IOException
360
+ private void updateProcessState(Job job, TaskReportSet reportSet, Path stateDir,
361
+ ProcessState state, ModelManager modelManager, boolean inProgress) throws IOException
297
362
  {
298
- List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager);
363
+ List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager,
364
+ inProgress, job.getJobID());
299
365
 
300
366
  for (AttemptReport report : reports) {
301
- if (report == null) {
302
- continue;
303
- }
304
- if (!report.isAvailable()) {
305
- if (skipUnavailable) {
306
- continue;
307
- } else {
308
- throw report.getUnavailableException();
309
- }
310
- }
311
- AttemptState attempt = report.getAttemptState();
312
- if (attempt.getInputTaskIndex().isPresent()) {
313
- updateState(state.getInputTaskState(attempt.getInputTaskIndex().get()), attempt, true);
314
- }
315
- if (attempt.getOutputTaskIndex().isPresent()) {
316
- updateState(state.getOutputTaskState(attempt.getOutputTaskIndex().get()), attempt, false);
367
+ if (report.isAvailable()) {
368
+ reportSet.update(report);
317
369
  }
318
370
  }
371
+
372
+ for (AttemptReport report : reportSet.getLatestInputAttemptReports()) {
373
+ updateTaskState(state.getInputTaskState(report.getInputTaskIndex().get()), report.getAttemptState(), true);
374
+ }
375
+
376
+ for (AttemptReport report : reportSet.getLatestOutputAttemptReports()) {
377
+ updateTaskState(state.getOutputTaskState(report.getOutputTaskIndex().get()), report.getAttemptState(), true);
378
+ }
319
379
  }
320
380
 
321
- private static void updateState(TaskState state, AttemptState attempt, boolean isInput)
381
+ private static void updateTaskState(TaskState state, AttemptState attempt, boolean isInput)
322
382
  {
323
383
  state.start();
384
+ Optional<CommitReport> commitReport = isInput ? attempt.getInputCommitReport() : attempt.getOutputCommitReport();
385
+ boolean committed = commitReport.isPresent();
324
386
  if (attempt.getException().isPresent()) {
325
387
  if (!state.isCommitted()) {
326
388
  state.setException(new RemoteTaskFailedException(attempt.getException().get()));
327
389
  }
328
- } else if (
329
- (isInput && attempt.getInputCommitReport().isPresent()) ||
330
- (!isInput && attempt.getOutputCommitReport().isPresent())) {
331
- state.resetException();
332
- }
333
- if (isInput && attempt.getInputCommitReport().isPresent()) {
334
- state.setCommitReport(attempt.getInputCommitReport().get());
335
- state.finish();
336
390
  }
337
- if (!isInput && attempt.getOutputCommitReport().isPresent()) {
338
- state.setCommitReport(attempt.getOutputCommitReport().get());
391
+ if (commitReport.isPresent()) {
392
+ state.setCommitReport(commitReport.get());
339
393
  state.finish();
340
394
  }
341
395
  }
@@ -370,6 +424,16 @@ public class MapReduceExecutor
370
424
  return unavailableException;
371
425
  }
372
426
 
427
+ public Optional<Integer> getInputTaskIndex()
428
+ {
429
+ return attemptState == null ? Optional.<Integer>absent() : attemptState.getInputTaskIndex();
430
+ }
431
+
432
+ public Optional<Integer> getOutputTaskIndex()
433
+ {
434
+ return attemptState == null ? Optional.<Integer>absent() : attemptState.getOutputTaskIndex();
435
+ }
436
+
373
437
  public boolean isInputCommitted()
374
438
  {
375
439
  return attemptState != null && attemptState.getInputCommitReport().isPresent();
@@ -380,28 +444,39 @@ public class MapReduceExecutor
380
444
  return attemptState != null && attemptState.getOutputCommitReport().isPresent();
381
445
  }
382
446
 
447
+ public TaskAttemptID getTaskAttempId()
448
+ {
449
+ return attemptId;
450
+ }
451
+
383
452
  public AttemptState getAttemptState()
384
453
  {
385
454
  return attemptState;
386
455
  }
387
456
  }
388
457
 
389
- private static final int TASK_EVENT_FETCH_SIZE = 100;
390
-
391
458
  private static List<AttemptReport> getAttemptReports(Configuration config,
392
- Path stateDir, ModelManager modelManager) throws IOException
459
+ Path stateDir, ModelManager modelManager,
460
+ boolean jobIsRunning, JobID runningJobId) throws IOException
393
461
  {
394
462
  ImmutableList.Builder<AttemptReport> builder = ImmutableList.builder();
395
463
  for (TaskAttemptID aid : EmbulkMapReduce.listAttempts(config, stateDir)) {
464
+ boolean concurrentWriteIsPossible = aid.getJobID().equals(runningJobId) && jobIsRunning;
396
465
  try {
397
466
  AttemptState state = EmbulkMapReduce.readAttemptStateFile(config,
398
- stateDir, aid, modelManager);
467
+ stateDir, aid, modelManager, concurrentWriteIsPossible);
399
468
  builder.add(new AttemptReport(aid, state));
400
469
  } catch (IOException ex) {
401
- // Either of:
402
- // * race condition of AttemptReport.writeTo and .readFrom
403
- // * FileSystem is not working
404
- // See also comments on MapReduceExecutor.readAttemptStateFile.isRetryableException.
470
+ // See comments on readAttemptStateFile for the possible error causes.
471
+ if (!concurrentWriteIsPossible) {
472
+ if (!(ex instanceof EOFException)) {
473
+ // f) HDFS is broken. This is critical problem which should throw an exception
474
+ throw new RuntimeException(ex);
475
+ }
476
+ // HDFS is working but file is corrupted. It is always possible that the directly
477
+ // contains corrupted file created by past attempts of retried task or job. Ignore it.
478
+ }
479
+ // if concurrentWriteIsPossible, there're no ways to tell the cause. Ignore it.
405
480
  builder.add(new AttemptReport(aid, ex));
406
481
  }
407
482
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-executor-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-27 00:00:00.000000000 Z
11
+ date: 2015-07-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Executes tasks on Hadoop.
14
14
  email:
@@ -60,7 +60,7 @@ files:
60
60
  - classpath/curator-client-2.6.0.jar
61
61
  - classpath/curator-framework-2.6.0.jar
62
62
  - classpath/curator-recipes-2.6.0.jar
63
- - classpath/embulk-executor-mapreduce-0.1.2.jar
63
+ - classpath/embulk-executor-mapreduce-0.1.3.jar
64
64
  - classpath/gson-2.2.4.jar
65
65
  - classpath/hadoop-annotations-2.6.0.jar
66
66
  - classpath/hadoop-auth-2.6.0.jar