RubyGems - embulk-executor-mapreduce - Versions diffs - 0.1.2 → 0.1.3 - Mend

embulk-executor-mapreduce 0.1.2 → 0.1.3

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 05feb97f9e21b2feec1b0e2d1517d17797d68c54
-  data.tar.gz: f7ddeeee84821d58ca377d7d396ea403b2a27b27
+  metadata.gz: dc83806412506bc567037cdf24a1c247a99abf13
+  data.tar.gz: 10a3dd696c3729f58a0c4e2b0ec5b0217cbdfcc1
 SHA512:
-  metadata.gz: e3213d0c7269f68824c94a06cdeca185deeb9ebdc5b518569c99893b75a8ab9b3bb55f4bc93295844891e1f12ecb48fed649f16d8ea2220d65470b3810a3af47
-  data.tar.gz: 9b3104672bc7b400d8096850925e6e843d8c713fbd5b4adc88347102f7dd2256734fc6fd9904c7e6e9d6f76b61db9dd25c8707fe0555fa1318568a94fd72843f
+  metadata.gz: f4ef4b1809a3acf01d0cf449efd4bc026fc77e60bd3aba8708102b0118c06a4b5a7bfd06ce70f497d0121da86ddfe9012dc20ab152c0c192ba0dad1eb80065be
+  data.tar.gz: 0999ab7bc7eb9fa1e71e61212c6be680c9e5fa232ea1cd4d57f45a8e7228e41ea4d4ee8c1e794de64b5caf77232264126979560fe09988bb55d264bfa2839e70

data/classpath/{embulk-executor-mapreduce-0.1.2.jar → embulk-executor-mapreduce-0.1.3.jar} RENAMED Viewed

Binary file

data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java CHANGED Viewed

@@ -1,15 +1,18 @@
 package org.embulk.executor.mapreduce;
-import java.io.EOFException;
-import java.io.InterruptedIOException;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
 import java.io.File;
 import java.io.IOException;
+import java.io.EOFException;
+import java.io.InterruptedIOException;
+import java.lang.reflect.InvocationTargetException;
 import com.google.inject.Injector;
 import com.google.common.base.Optional;
 import com.google.common.base.Throwables;
+import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -24,6 +27,7 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -48,11 +52,14 @@ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
 public class EmbulkMapReduce
 {
+    private static final String SYSTEM_CONFIG_SERVICE_CLASS = "mapreduce_service_class";
     private static final String CK_SYSTEM_CONFIG = "embulk.mapreduce.systemConfig";
     private static final String CK_STATE_DIRECTORY_PATH = "embulk.mapreduce.stateDirectorypath";
     private static final String CK_TASK_COUNT = "embulk.mapreduce.taskCount";
     private static final String CK_TASK = "embulk.mapreduce.task";
     private static final String CK_PLUGIN_ARCHIVE_SPECS = "embulk.mapreduce.pluginArchive.specs";
     private static final String PLUGIN_ARCHIVE_FILE_NAME = "gems.zip";
     public static void setSystemConfig(Configuration config, ModelManager modelManager, ConfigSource systemConfig)
@@ -105,51 +112,157 @@ public class EmbulkMapReduce
     public static Injector newEmbulkInstance(Configuration config)
     {
         ConfigSource systemConfig = getSystemConfig(config);
-        return new EmbulkService(systemConfig).getInjector();
+        String serviceClassName = systemConfig.get(String.class, SYSTEM_CONFIG_SERVICE_CLASS, "org.embulk.EmbulkService");
+        try {
+            Object obj;
+            if (serviceClassName.equals("org.embulk.EmbulkService")) {
+                obj = new EmbulkService(systemConfig);
+            } else {
+                Class<?> serviceClass = Class.forName(serviceClassName);
+                obj = serviceClass.getConstructor(ConfigSource.class).newInstance(systemConfig);
+            }
+            if (obj instanceof EmbulkService) {
+                return ((EmbulkService) obj).getInjector();
+            } else {
+                return (Injector) obj.getClass().getMethod("getInjector").invoke(obj);
+            }
+        } catch (InvocationTargetException ex) {
+            throw Throwables.propagate(ex.getCause());
+        } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | IllegalArgumentException ex) {
+            throw Throwables.propagate(ex);
+        }
     }
-    public static List<TaskAttemptID> listAttempts(Configuration config,
-            Path stateDir) throws IOException
+    public static class JobStatus
     {
-        FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
-        ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
-        for (FileStatus stat : stats) {
-            if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
-                String name = stat.getPath().getName();
-                try {
-                    builder.add(TaskAttemptID.forName(name));
-                } catch (IllegalArgumentException ex) {
-                    // ignore
+        private final boolean completed;
+        private final float mapProgress;
+        private final float reduceProgress;
+        public JobStatus(boolean completed, float mapProgress, float reduceProgress)
+        {
+            this.completed = completed;
+            this.mapProgress = mapProgress;
+            this.reduceProgress = reduceProgress;
+        }
+        public boolean isComplete()
+        {
+            return completed;
+        }
+        public float getMapProgress()
+        {
+            return mapProgress;
+        }
+        public float getReduceProgress()
+        {
+            return reduceProgress;
+        }
+    }
+    public static JobStatus getJobStatus(final Job job) throws IOException
+    {
+        return hadoopOperationWithRetry("getting job status", new Callable<JobStatus>() {
+            public JobStatus call() throws IOException
+            {
+                return new JobStatus(job.isComplete(), job.mapProgress(), job.reduceProgress());
+            }
+        });
+    }
+    public static Counters getJobCounters(final Job job) throws IOException
+    {
+        return hadoopOperationWithRetry("getting job counters", new Callable<Counters>() {
+            public Counters call() throws IOException
+            {
+                return job.getCounters();
+            }
+        });
+    }
+    public static List<TaskAttemptID> listAttempts(final Configuration config,
+            final Path stateDir) throws IOException
+    {
+        return hadoopOperationWithRetry("getting list of attempt state files on "+stateDir, new Callable<List<TaskAttemptID>>() {
+            public List<TaskAttemptID> call() throws IOException
+            {
+                FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
+                ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
+                for (FileStatus stat : stats) {
+                    if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
+                        String name = stat.getPath().getName();
+                        TaskAttemptID id;
+                        try {
+                            id = TaskAttemptID.forName(name);
+                        } catch (Exception ex) {
+                            // ignore this file
+                            continue;
+                        }
+                        builder.add(id);
+                    }
                 }
+                return builder.build();
             }
-        }
-        return builder.build();
+        });
     }
-    public static PluginArchive readPluginArchive(File localDirectory, Configuration config,
-            Path stateDir, ModelManager modelManager) throws IOException
+    public static void writePluginArchive(final Configuration config, final Path stateDir,
+            final PluginArchive archive, final ModelManager modelManager) throws IOException
     {
-        List<PluginArchive.GemSpec> specs = modelManager.readObject(
-                new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
-                config.get(CK_PLUGIN_ARCHIVE_SPECS));
-        Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
-        try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
-            return PluginArchive.load(localDirectory, specs, in);
-        }
+        final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
+        hadoopOperationWithRetry("writing plugin archive to "+path, new Callable<Void>() {
+            public Void call() throws IOException
+            {
+                stateDir.getFileSystem(config).mkdirs(stateDir);
+                try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
+                    List<PluginArchive.GemSpec> specs = archive.dump(out);
+                    config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
+                }
+                return null;
+            }
+        });
     }
-    public static void writePluginArchive(Configuration config, Path stateDir,
-            PluginArchive archive, ModelManager modelManager) throws IOException
+    public static PluginArchive readPluginArchive(final File localDirectory, final Configuration config,
+            Path stateDir, final ModelManager modelManager) throws IOException
     {
-        Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
-        try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
-            List<PluginArchive.GemSpec> specs = archive.dump(out);
-            config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
-        }
+        final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
+        return hadoopOperationWithRetry("reading plugin archive file from "+path, new Callable<PluginArchive>() {
+                public PluginArchive call() throws IOException
+                {
+                    List<PluginArchive.GemSpec> specs = modelManager.readObject(
+                            new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
+                            config.get(CK_PLUGIN_ARCHIVE_SPECS));
+                    try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
+                        return PluginArchive.load(localDirectory, specs, in);
+                    }
+                }
+        });
+    }
+    public static void writeAttemptStateFile(final Configuration config,
+            Path stateDir, final AttemptState state, final ModelManager modelManager) throws IOException
+    {
+        final Path path = new Path(stateDir, state.getAttemptId().toString());
+        hadoopOperationWithRetry("writing attempt state file to "+path, new Callable<Void>() {
+            public Void call() throws IOException
+            {
+                try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
+                    state.writeTo(out, modelManager);
+                }
+                return null;
+            }
+        });
     }
     public static AttemptState readAttemptStateFile(final Configuration config,
-            Path stateDir, TaskAttemptID id, final ModelManager modelManager) throws IOException
+            Path stateDir, TaskAttemptID id, final ModelManager modelManager,
+            final boolean concurrentWriteIsPossible) throws IOException
     {
         final Logger log = Exec.getLogger(EmbulkMapReduce.class);
         final Path path = new Path(stateDir, id.toString());
@@ -160,34 +273,46 @@ public class EmbulkMapReduce
                     .withMaxRetryWait(20 * 1000)
                     .runInterruptible(new Retryable<AttemptState>() {
                         @Override
-                        public AttemptState call() throws IOException {
+                        public AttemptState call() throws IOException
+                        {
                             try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
                                 return AttemptState.readFrom(in, modelManager);
                             }
                         }
                         @Override
-                        public boolean isRetryableException(Exception exception) {
-                            // AttemptState.readFrom throws 2 types of exceptions:
-                            //   a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
-                            //   b) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
-                            //   c) other IOException: FileSystem is not working
+                        public boolean isRetryableException(Exception exception)
+                        {
+                            // AttemptState.readFrom throws 4 types of exceptions:
+                            //
+                            //   concurrentWriteIsPossible == true:
+                            //      a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
+                            //      b) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
+                            //      c) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
+                            //      d) IOException: FileSystem is not working
+                            //   concurrentWriteIsPossible == false:
+                            //      e) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
+                            //      f) IOException: FileSystem is not working
                             //
-                            // a) and b) are temporary problem which is not critical. c) could be temporary problem and it is critical.
-                            // Here retries regardless of the exception type because we can't distinguish b) from c).
+                            if (exception instanceof EOFException && !concurrentWriteIsPossible) {
+                                // e) is not recoverable.
+                                return false;
+                            }
                             return true;
                         }
                         @Override
                         public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
-                                throws RetryGiveupException {
-                            log.warn("Retrying opening state file " + path.getName() + " error: " + exception);
+                                throws RetryGiveupException
+                        {
+                            log.warn("Retrying opening state file {} ({}/{}) error: {}",
+                                    path, retryCount, retryLimit, exception);
                         }
                         @Override
                         public void onGiveup(Exception firstException, Exception lastException)
-                                throws RetryGiveupException {
-                        }
+                                throws RetryGiveupException
+                        { }
                     });
         } catch (RetryGiveupException e) {
             Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
@@ -197,13 +322,45 @@ public class EmbulkMapReduce
         }
     }
-    public static void writeAttemptStateFile(Configuration config,
-            Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
+    private static <T> T hadoopOperationWithRetry(final String message, final Callable<T> callable) throws IOException
     {
-        Path path = new Path(stateDir, state.getAttemptId().toString());
-        // TODO retry file create and write
-        try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
-            state.writeTo(out, modelManager);
+        final Logger log = Exec.getLogger(EmbulkMapReduce.class);
+        try {
+            return retryExecutor()
+                    .withRetryLimit(5)
+                    .withInitialRetryWait(2 * 1000)
+                    .withMaxRetryWait(20 * 1000)
+                    .runInterruptible(new Retryable<T>() {
+                        @Override
+                        public T call() throws Exception
+                        {
+                            return callable.call();
+                        }
+                        @Override
+                        public boolean isRetryableException(Exception exception)
+                        {
+                            return true;
+                        }
+                        @Override
+                        public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
+                                throws RetryGiveupException
+                        {
+                            log.warn("Retrying {} ({}/{}) error: {}",
+                                    message, retryCount, retryLimit, exception);
+                        }
+                        @Override
+                        public void onGiveup(Exception firstException, Exception lastException)
+                                throws RetryGiveupException
+                        { }
+                    });
+        } catch (RetryGiveupException e) {
+            Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+            throw Throwables.propagate(e.getCause());
+        } catch (InterruptedException e) {
+            throw new InterruptedIOException();
         }
     }

data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java CHANGED Viewed

@@ -1,9 +1,11 @@
 package org.embulk.executor.mapreduce;
 import java.util.List;
-import java.util.Map;
+import java.util.Collection;
 import java.util.Set;
+import java.util.Map;
 import java.util.HashSet;
+import java.util.HashMap;
 import java.io.File;
 import java.io.IOException;
 import java.io.EOFException;
@@ -29,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -37,6 +40,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.embulk.exec.ForSystemConfig;
 import org.embulk.config.ConfigSource;
+import org.embulk.config.CommitReport;
 import org.embulk.config.ConfigException;
 import org.embulk.config.TaskSource;
 import org.embulk.config.ModelManager;
@@ -116,6 +120,67 @@ public class MapReduceExecutor
         }
     }
+    private static class TaskReportSet
+    {
+        private Map<Integer, AttemptReport> inputTaskReports = new HashMap<>();
+        private Map<Integer, AttemptReport> outputTaskReports = new HashMap<>();
+        private final JobID runningJobId;
+        public TaskReportSet(JobID runningJobId)
+        {
+            this.runningJobId = runningJobId;
+        }
+        public Collection<AttemptReport> getLatestInputAttemptReports()
+        {
+            return inputTaskReports.values();
+        }
+        public Collection<AttemptReport> getLatestOutputAttemptReports()
+        {
+            return outputTaskReports.values();
+        }
+        public void update(AttemptReport report)
+        {
+            if (report.getInputTaskIndex().isPresent()) {
+                int taskIndex = report.getInputTaskIndex().get();
+                AttemptReport past = inputTaskReports.get(taskIndex);
+                if (past == null || checkOverwrite(past, report)) {
+                    inputTaskReports.put(taskIndex, report);
+                }
+            }
+            if (report.getOutputTaskIndex().isPresent()) {
+                int taskIndex = report.getOutputTaskIndex().get();
+                AttemptReport past = outputTaskReports.get(taskIndex);
+                if (past == null || checkOverwrite(past, report)) {
+                    outputTaskReports.put(taskIndex, report);
+                }
+            }
+        }
+        private boolean checkOverwrite(AttemptReport past, AttemptReport report)
+        {
+            // if already committed successfully, use it
+            if (!past.isOutputCommitted() && report.isOutputCommitted()) {
+                return true;
+            }
+            // Here expects that TaskAttemptID.compareTo returns <= 0 if attempt is started later.
+            // However, it returns unexpected result if 2 jobs run on different JobTrackers because
+            // JobID includes start time of a JobTracker with sequence number in the JobTracker
+            // rather than start time of a job. To mitigate this problem, this code assumes that
+            // attempts of the running job is always newer.
+            boolean pastRunning = past.getTaskAttempId().getJobID().equals(runningJobId);
+            boolean reportRunning = report.getTaskAttempId().getJobID().equals(runningJobId);
+            if (!pastRunning && reportRunning) {
+                return true;
+            }
+            return past.getTaskAttempId().compareTo(report.getTaskAttempId()) <= 0;
+        }
+    }
     void run(MapReduceExecutorTask task,
             int mapTaskCount, int reduceTaskCount, ProcessState state)
     {
@@ -163,14 +228,7 @@ public class MapReduceExecutor
         EmbulkMapReduce.setMapTaskCount(job.getConfiguration(), mapTaskCount);  // used by EmbulkInputFormat
         EmbulkMapReduce.setStateDirectoryPath(job.getConfiguration(), stateDir);
-        // create state dir
-        try {
-            stateDir.getFileSystem(job.getConfiguration()).mkdirs(stateDir);
-        } catch (IOException ex) {
-            throw new RuntimeException(ex);
-        }
-        // archive plugins
+        // archive plugins (also create state dir)
         PluginArchive archive = new PluginArchive.Builder()
             .addLoadedRubyGems(jruby)
             .build();
@@ -211,26 +269,33 @@ public class MapReduceExecutor
         try {
             job.submit();
+            TaskReportSet reportSet = new TaskReportSet(job.getJobID());
             int interval = Job.getCompletionPollInterval(job.getConfiguration());
-            while (!job.isComplete()) {
+            while (true) {
+                EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
+                if (status.isComplete()) {
+                    break;
+                }
+                log.info(String.format("map %.1f%% reduce %.1f%%",
+                            status.getMapProgress() * 100, status.getReduceProgress() * 100));
                 //if (job.getState() == JobStatus.State.PREP) {
                 //    continue;
                 //}
-                log.info(String.format("map %.1f%% reduce %.1f%%",
-                            job.mapProgress() * 100, job.reduceProgress() * 100));
                 Thread.sleep(interval);
-                updateProcessState(job, mapTaskCount, stateDir, state, modelManager, true);
+                updateProcessState(job, reportSet, stateDir, state, modelManager, true);
             }
-            // Here sets skipUnavailable=false to updateProcessState method because race
-            // condition of AttemptReport.readFrom and .writeTo does not happen here.
+            EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
             log.info(String.format("map %.1f%% reduce %.1f%%",
-                        job.mapProgress() * 100, job.reduceProgress() * 100));
-            updateProcessState(job, mapTaskCount, stateDir, state, modelManager, false);
+                        status.getMapProgress() * 100, status.getReduceProgress() * 100));
+            // Here sets inProgress=false to updateProcessState method to tell that race
+            // condition of AttemptReport.readFrom and .writeTo does not happen here.
+            updateProcessState(job, reportSet, stateDir, state, modelManager, false);
-            Counters counters = job.getCounters();
+            Counters counters = EmbulkMapReduce.getJobCounters(job);
             if (counters != null) {
                 log.info(counters.toString());
             }
@@ -292,50 +357,39 @@ public class MapReduceExecutor
             + String.format("%09d", time.getNano());
     }
-    private void updateProcessState(Job job, int mapTaskCount, Path stateDir,
-            ProcessState state, ModelManager modelManager, boolean skipUnavailable) throws IOException
+    private void updateProcessState(Job job, TaskReportSet reportSet, Path stateDir,
+            ProcessState state, ModelManager modelManager, boolean inProgress) throws IOException
     {
-        List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager);
+        List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager,
+                inProgress, job.getJobID());
         for (AttemptReport report : reports) {
-            if (report == null) {
-                continue;
-            }
-            if (!report.isAvailable()) {
-                if (skipUnavailable) {
-                    continue;
-                } else {
-                    throw report.getUnavailableException();
-                }
-            }
-            AttemptState attempt = report.getAttemptState();
-            if (attempt.getInputTaskIndex().isPresent()) {
-                updateState(state.getInputTaskState(attempt.getInputTaskIndex().get()), attempt, true);
-            }
-            if (attempt.getOutputTaskIndex().isPresent()) {
-                updateState(state.getOutputTaskState(attempt.getOutputTaskIndex().get()), attempt, false);
+            if (report.isAvailable()) {
+                reportSet.update(report);
             }
         }
+        for (AttemptReport report : reportSet.getLatestInputAttemptReports()) {
+            updateTaskState(state.getInputTaskState(report.getInputTaskIndex().get()), report.getAttemptState(), true);
+        }
+        for (AttemptReport report : reportSet.getLatestOutputAttemptReports()) {
+            updateTaskState(state.getOutputTaskState(report.getOutputTaskIndex().get()), report.getAttemptState(), true);
+        }
     }
-    private static void updateState(TaskState state, AttemptState attempt, boolean isInput)
+    private static void updateTaskState(TaskState state, AttemptState attempt, boolean isInput)
     {
         state.start();
+        Optional<CommitReport> commitReport = isInput ? attempt.getInputCommitReport() : attempt.getOutputCommitReport();
+        boolean committed = commitReport.isPresent();
         if (attempt.getException().isPresent()) {
             if (!state.isCommitted()) {
                 state.setException(new RemoteTaskFailedException(attempt.getException().get()));
             }
-        } else if (
-                (isInput && attempt.getInputCommitReport().isPresent()) ||
-                (!isInput && attempt.getOutputCommitReport().isPresent())) {
-            state.resetException();
-        }
-        if (isInput && attempt.getInputCommitReport().isPresent()) {
-            state.setCommitReport(attempt.getInputCommitReport().get());
-            state.finish();
         }
-        if (!isInput && attempt.getOutputCommitReport().isPresent()) {
-            state.setCommitReport(attempt.getOutputCommitReport().get());
+        if (commitReport.isPresent()) {
+            state.setCommitReport(commitReport.get());
             state.finish();
         }
     }
@@ -370,6 +424,16 @@ public class MapReduceExecutor
             return unavailableException;
         }
+        public Optional<Integer> getInputTaskIndex()
+        {
+            return attemptState == null ? Optional.<Integer>absent() : attemptState.getInputTaskIndex();
+        }
+        public Optional<Integer> getOutputTaskIndex()
+        {
+            return attemptState == null ? Optional.<Integer>absent() : attemptState.getOutputTaskIndex();
+        }
         public boolean isInputCommitted()
         {
             return attemptState != null && attemptState.getInputCommitReport().isPresent();
@@ -380,28 +444,39 @@ public class MapReduceExecutor
             return attemptState != null && attemptState.getOutputCommitReport().isPresent();
         }
+        public TaskAttemptID getTaskAttempId()
+        {
+            return attemptId;
+        }
         public AttemptState getAttemptState()
         {
             return attemptState;
         }
     }
-    private static final int TASK_EVENT_FETCH_SIZE = 100;
     private static List<AttemptReport> getAttemptReports(Configuration config,
-            Path stateDir, ModelManager modelManager) throws IOException
+            Path stateDir, ModelManager modelManager,
+            boolean jobIsRunning, JobID runningJobId) throws IOException
     {
         ImmutableList.Builder<AttemptReport> builder = ImmutableList.builder();
         for (TaskAttemptID aid : EmbulkMapReduce.listAttempts(config, stateDir)) {
+            boolean concurrentWriteIsPossible = aid.getJobID().equals(runningJobId) && jobIsRunning;
             try {
                 AttemptState state = EmbulkMapReduce.readAttemptStateFile(config,
-                        stateDir, aid, modelManager);
+                        stateDir, aid, modelManager, concurrentWriteIsPossible);
                 builder.add(new AttemptReport(aid, state));
             } catch (IOException ex) {
-                // Either of:
-                //   * race condition of AttemptReport.writeTo and .readFrom
-                //   * FileSystem is not working
-                // See also comments on MapReduceExecutor.readAttemptStateFile.isRetryableException.
+                // See comments on readAttemptStateFile for the possible error causes.
+                if (!concurrentWriteIsPossible) {
+                    if (!(ex instanceof EOFException)) {
+                        // f) HDFS is broken. This is critical problem which should throw an exception
+                        throw new RuntimeException(ex);
+                    }
+                    // HDFS is working but file is corrupted. It is always possible that the directly
+                    // contains corrupted file created by past attempts of retried task or job. Ignore it.
+                }
+                // if concurrentWriteIsPossible, there're no ways to tell the cause. Ignore it.
                 builder.add(new AttemptReport(aid, ex));
             }
         }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: embulk-executor-mapreduce
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Sadayuki Furuhashi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-06-27 00:00:00.000000000 Z
+date: 2015-07-02 00:00:00.000000000 Z
 dependencies: []
 description: Executes tasks on Hadoop.
 email:
@@ -60,7 +60,7 @@ files:
 - classpath/curator-client-2.6.0.jar
 - classpath/curator-framework-2.6.0.jar
 - classpath/curator-recipes-2.6.0.jar
-- classpath/embulk-executor-mapreduce-0.1.2.jar
+- classpath/embulk-executor-mapreduce-0.1.3.jar
 - classpath/gson-2.2.4.jar
 - classpath/hadoop-annotations-2.6.0.jar
 - classpath/hadoop-auth-2.6.0.jar