RubyGems - embulk-executor-mapreduce - Versions diffs - 0.1.2 → 0.1.3 - Mend

embulk-executor-mapreduce 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 05feb97f9e21b2feec1b0e2d1517d17797d68c54
-  data.tar.gz: f7ddeeee84821d58ca377d7d396ea403b2a27b27
+  metadata.gz: dc83806412506bc567037cdf24a1c247a99abf13
+  data.tar.gz: 10a3dd696c3729f58a0c4e2b0ec5b0217cbdfcc1
 SHA512:
-  metadata.gz: e3213d0c7269f68824c94a06cdeca185deeb9ebdc5b518569c99893b75a8ab9b3bb55f4bc93295844891e1f12ecb48fed649f16d8ea2220d65470b3810a3af47
-  data.tar.gz: 9b3104672bc7b400d8096850925e6e843d8c713fbd5b4adc88347102f7dd2256734fc6fd9904c7e6e9d6f76b61db9dd25c8707fe0555fa1318568a94fd72843f
+  metadata.gz: f4ef4b1809a3acf01d0cf449efd4bc026fc77e60bd3aba8708102b0118c06a4b5a7bfd06ce70f497d0121da86ddfe9012dc20ab152c0c192ba0dad1eb80065be
+  data.tar.gz: 0999ab7bc7eb9fa1e71e61212c6be680c9e5fa232ea1cd4d57f45a8e7228e41ea4d4ee8c1e794de64b5caf77232264126979560fe09988bb55d264bfa2839e70

data/classpath/{embulk-executor-mapreduce-0.1.2.jar → embulk-executor-mapreduce-0.1.3.jar} RENAMED Viewed

Binary file

data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java CHANGED Viewed

@@ -1,15 +1,18 @@
 package org.embulk.executor.mapreduce;
-import java.io.EOFException;
-import java.io.InterruptedIOException;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
 import java.io.File;
 import java.io.IOException;
+import java.io.EOFException;
+import java.io.InterruptedIOException;
+import java.lang.reflect.InvocationTargetException;
 import com.google.inject.Injector;
 import com.google.common.base.Optional;
 import com.google.common.base.Throwables;
+import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -24,6 +27,7 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -48,11 +52,14 @@ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
 public class EmbulkMapReduce
 {
+    private static final String SYSTEM_CONFIG_SERVICE_CLASS = "mapreduce_service_class";
     private static final String CK_SYSTEM_CONFIG = "embulk.mapreduce.systemConfig";
     private static final String CK_STATE_DIRECTORY_PATH = "embulk.mapreduce.stateDirectorypath";
     private static final String CK_TASK_COUNT = "embulk.mapreduce.taskCount";
     private static final String CK_TASK = "embulk.mapreduce.task";
     private static final String CK_PLUGIN_ARCHIVE_SPECS = "embulk.mapreduce.pluginArchive.specs";
     private static final String PLUGIN_ARCHIVE_FILE_NAME = "gems.zip";
     public static void setSystemConfig(Configuration config, ModelManager modelManager, ConfigSource systemConfig)
@@ -105,51 +112,157 @@ public class EmbulkMapReduce
     public static Injector newEmbulkInstance(Configuration config)
     {
         ConfigSource systemConfig = getSystemConfig(config);
-        return new EmbulkService(systemConfig).getInjector();
+        String serviceClassName = systemConfig.get(String.class, SYSTEM_CONFIG_SERVICE_CLASS, "org.embulk.EmbulkService");
+        try {
+            Object obj;
+            if (serviceClassName.equals("org.embulk.EmbulkService")) {
+                obj = new EmbulkService(systemConfig);
+            } else {
+                Class<?> serviceClass = Class.forName(serviceClassName);
+                obj = serviceClass.getConstructor(ConfigSource.class).newInstance(systemConfig);
+            }
+            if (obj instanceof EmbulkService) {
+                return ((EmbulkService) obj).getInjector();
+            } else {
+                return (Injector) obj.getClass().getMethod("getInjector").invoke(obj);
+            }
+        } catch (InvocationTargetException ex) {
+            throw Throwables.propagate(ex.getCause());
+        } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | IllegalArgumentException ex) {
+            throw Throwables.propagate(ex);
+        }
     }
-    public static List<TaskAttemptID> listAttempts(Configuration config,
-            Path stateDir) throws IOException
+    public static class JobStatus
     {
-        FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
-        ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
-        for (FileStatus stat : stats) {
-            if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
-                String name = stat.getPath().getName();
-                try {
-                    builder.add(TaskAttemptID.forName(name));
-                } catch (IllegalArgumentException ex) {
-                    // ignore
+        private final boolean completed;
+        private final float mapProgress;
+        private final float reduceProgress;
+        public JobStatus(boolean completed, float mapProgress, float reduceProgress)
+        {
+            this.completed = completed;
+            this.mapProgress = mapProgress;
+            this.reduceProgress = reduceProgress;
+        }
+        public boolean isComplete()
+        {
+            return completed;
+        }
+        public float getMapProgress()
+        {
+            return mapProgress;
+        }
+        public float getReduceProgress()
+        {
+            return reduceProgress;
+        }
+    }
+    public static JobStatus getJobStatus(final Job job) throws IOException
+    {
+        return hadoopOperationWithRetry("getting job status", new Callable<JobStatus>() {
+            public JobStatus call() throws IOException
+            {
+                return new JobStatus(job.isComplete(), job.mapProgress(), job.reduceProgress());
+            }
+        });
+    }
+    public static Counters getJobCounters(final Job job) throws IOException
+    {
+        return hadoopOperationWithRetry("getting job counters", new Callable<Counters>() {
+            public Counters call() throws IOException
+            {
+                return job.getCounters();
+            }
+        });
+    }
+    public static List<TaskAttemptID> listAttempts(final Configuration config,
+            final Path stateDir) throws IOException
+    {
+        return hadoopOperationWithRetry("getting list of attempt state files on "+stateDir, new Callable<List<TaskAttemptID>>() {
+            public List<TaskAttemptID> call() throws IOException
+            {
+                FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
+                ImmutableList.Builder<TaskAttemptID> builder = ImmutableList.builder();
+                for (FileStatus stat : stats) {
+                    if (stat.getPath().getName().startsWith("attempt_") && stat.isFile()) {
+                        String name = stat.getPath().getName();
+                        TaskAttemptID id;
+                        try {
+                            id = TaskAttemptID.forName(name);
+                        } catch (Exception ex) {
+                            // ignore this file
+                            continue;
+                        }
+                        builder.add(id);
+                    }
                 }
+                return builder.build();
             }
-        }
-        return builder.build();
+        });
     }
-    public static PluginArchive readPluginArchive(File localDirectory, Configuration config,
-            Path stateDir, ModelManager modelManager) throws IOException
+    public static void writePluginArchive(final Configuration config, final Path stateDir,
+            final PluginArchive archive, final ModelManager modelManager) throws IOException
     {
-        List<PluginArchive.GemSpec> specs = modelManager.readObject(
-                new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
-                config.get(CK_PLUGIN_ARCHIVE_SPECS));
-        Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
-        try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
-            return PluginArchive.load(localDirectory, specs, in);
-        }
+        final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
+        hadoopOperationWithRetry("writing plugin archive to "+path, new Callable<Void>() {
+            public Void call() throws IOException
+            {
+                stateDir.getFileSystem(config).mkdirs(stateDir);
+                try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
+                    List<PluginArchive.GemSpec> specs = archive.dump(out);
+                    config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
+                }
+                return null;
+            }
+        });
     }
-    public static void writePluginArchive(Configuration config, Path stateDir,
-            PluginArchive archive, ModelManager modelManager) throws IOException
+    public static PluginArchive readPluginArchive(final File localDirectory, final Configuration config,
+            Path stateDir, final ModelManager modelManager) throws IOException
     {
-        Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
-        try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
-            List<PluginArchive.GemSpec> specs = archive.dump(out);
-            config.set(CK_PLUGIN_ARCHIVE_SPECS, modelManager.writeObject(specs));
-        }
+        final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
+        return hadoopOperationWithRetry("reading plugin archive file from "+path, new Callable<PluginArchive>() {
+                public PluginArchive call() throws IOException
+                {
+                    List<PluginArchive.GemSpec> specs = modelManager.readObject(
+                            new ArrayList<PluginArchive.GemSpec>() {}.getClass(),
+                            config.get(CK_PLUGIN_ARCHIVE_SPECS));
+                    try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
+                        return PluginArchive.load(localDirectory, specs, in);
+                    }
+                }
+        });
+    }
+    public static void writeAttemptStateFile(final Configuration config,
+            Path stateDir, final AttemptState state, final ModelManager modelManager) throws IOException
+    {
+        final Path path = new Path(stateDir, state.getAttemptId().toString());
+        hadoopOperationWithRetry("writing attempt state file to "+path, new Callable<Void>() {
+            public Void call() throws IOException
+            {
+                try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
+                    state.writeTo(out, modelManager);
+                }
+                return null;
+            }
+        });
     }
     public static AttemptState readAttemptStateFile(final Configuration config,
-            Path stateDir, TaskAttemptID id, final ModelManager modelManager) throws IOException
+            Path stateDir, TaskAttemptID id, final ModelManager modelManager,
+            final boolean concurrentWriteIsPossible) throws IOException
     {
         final Logger log = Exec.getLogger(EmbulkMapReduce.class);
         final Path path = new Path(stateDir, id.toString());
@@ -160,34 +273,46 @@ public class EmbulkMapReduce
                     .withMaxRetryWait(20 * 1000)
                     .runInterruptible(new Retryable<AttemptState>() {
                         @Override
-                        public AttemptState call() throws IOException {
+                        public AttemptState call() throws IOException
+                        {
                             try (FSDataInputStream in = path.getFileSystem(config).open(path)) {
                                 return AttemptState.readFrom(in, modelManager);
                             }
                         }
                         @Override
-                        public boolean isRetryableException(Exception exception) {
-                            // AttemptState.readFrom throws 2 types of exceptions:
-                            //   a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
-                            //   b) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
-                            //   c) other IOException: FileSystem is not working
+                        public boolean isRetryableException(Exception exception)
+                        {
+                            // AttemptState.readFrom throws 4 types of exceptions:
+                            //
+                            //   concurrentWriteIsPossible == true:
+                            //      a) EOFException: race between readFrom and writeTo. See comments on AttemptState.readFrom.
+                            //      b) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
+                            //      c) IOException "Cannot obtain block length for LocatedBlock": HDFS-1058. See https://github.com/embulk/embulk-executor-mapreduce/pull/3
+                            //      d) IOException: FileSystem is not working
+                            //   concurrentWriteIsPossible == false:
+                            //      e) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
+                            //      f) IOException: FileSystem is not working
                             //
-                            // a) and b) are temporary problem which is not critical. c) could be temporary problem and it is critical.
-                            // Here retries regardless of the exception type because we can't distinguish b) from c).
+                            if (exception instanceof EOFException && !concurrentWriteIsPossible) {
+                                // e) is not recoverable.
+                                return false;
+                            }
                             return true;
                         }
                         @Override
                         public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
-                                throws RetryGiveupException {
-                            log.warn("Retrying opening state file " + path.getName() + " error: " + exception);
+                                throws RetryGiveupException
+                        {
+                            log.warn("Retrying opening state file {} ({}/{}) error: {}",
+                                    path, retryCount, retryLimit, exception);
                         }
                         @Override
                         public void onGiveup(Exception firstException, Exception lastException)
-                                throws RetryGiveupException {
-                        }
+                                throws RetryGiveupException
+                        { }
                     });
         } catch (RetryGiveupException e) {
             Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
@@ -197,13 +322,45 @@ public class EmbulkMapReduce
         }
     }
-    public static void writeAttemptStateFile(Configuration config,
-            Path stateDir, AttemptState state, ModelManager modelManager) throws IOException
+    private static <T> T hadoopOperationWithRetry(final String message, final Callable<T> callable) throws IOException
     {
-        Path path = new Path(stateDir, state.getAttemptId().toString());
-        // TODO retry file create and write
-        try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
-            state.writeTo(out, modelManager);
+        final Logger log = Exec.getLogger(EmbulkMapReduce.class);
+        try {
+            return retryExecutor()
+                    .withRetryLimit(5)
+                    .withInitialRetryWait(2 * 1000)
+                    .withMaxRetryWait(20 * 1000)
+                    .runInterruptible(new Retryable<T>() {
+                        @Override
+                        public T call() throws Exception
+                        {
+                            return callable.call();
+                        }
+                        @Override
+                        public boolean isRetryableException(Exception exception)
+                        {
+                            return true;
+                        }
+                        @Override
+                        public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
+                                throws RetryGiveupException
+                        {
+                            log.warn("Retrying {} ({}/{}) error: {}",
+                                    message, retryCount, retryLimit, exception);
+                        }
+                        @Override
+                        public void onGiveup(Exception firstException, Exception lastException)
+                                throws RetryGiveupException
+                        { }
+                    });
+        } catch (RetryGiveupException e) {
+            Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+            throw Throwables.propagate(e.getCause());
+        } catch (InterruptedException e) {
+            throw new InterruptedIOException();
         }
     }

data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java CHANGED Viewed

@@ -1,9 +1,11 @@
 package org.embulk.executor.mapreduce;
 import java.util.List;
-import java.util.Map;
+import java.util.Collection;
 import java.util.Set;
+import java.util.Map;
 import java.util.HashSet;
+import java.util.HashMap;
 import java.io.File;
 import java.io.IOException;
 import java.io.EOFException;
@@ -29,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -37,6 +40,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.embulk.exec.ForSystemConfig;
 import org.embulk.config.ConfigSource;
+import org.embulk.config.CommitReport;
 import org.embulk.config.ConfigException;
 import org.embulk.config.TaskSource;
 import org.embulk.config.ModelManager;
@@ -116,6 +120,67 @@ public class MapReduceExecutor
         }
     }
+    private static class TaskReportSet
+    {
+        private Map<Integer, AttemptReport> inputTaskReports = new HashMap<>();
+        private Map<Integer, AttemptReport> outputTaskReports = new HashMap<>();
+        private final JobID runningJobId;
+        public TaskReportSet(JobID runningJobId)
+        {
+            this.runningJobId = runningJobId;
+        }
+        public Collection<AttemptReport> getLatestInputAttemptReports()
+        {
+            return inputTaskReports.values();
+        }
+        public Collection<AttemptReport> getLatestOutputAttemptReports()
+        {
+            return outputTaskReports.values();
+        }
+        public void update(AttemptReport report)
+        {
+            if (report.getInputTaskIndex().isPresent()) {
+                int taskIndex = report.getInputTaskIndex().get();
+                AttemptReport past = inputTaskReports.get(taskIndex);
+                if (past == null || checkOverwrite(past, report)) {
+                    inputTaskReports.put(taskIndex, report);
+                }
+            }
+            if (report.getOutputTaskIndex().isPresent()) {
+                int taskIndex = report.getOutputTaskIndex().get();
+                AttemptReport past = outputTaskReports.get(taskIndex);
+                if (past == null || checkOverwrite(past, report)) {
+                    outputTaskReports.put(taskIndex, report);
+                }
+            }
+        }
+        private boolean checkOverwrite(AttemptReport past, AttemptReport report)
+        {
+            // if already committed successfully, use it
+            if (!past.isOutputCommitted() && report.isOutputCommitted()) {
+                return true;
+            }
+            // Here expects that TaskAttemptID.compareTo returns <= 0 if attempt is started later.
+            // However, it returns unexpected result if 2 jobs run on different JobTrackers because
+            // JobID includes start time of a JobTracker with sequence number in the JobTracker
+            // rather than start time of a job. To mitigate this problem, this code assumes that
+            // attempts of the running job is always newer.
+            boolean pastRunning = past.getTaskAttempId().getJobID().equals(runningJobId);
+            boolean reportRunning = report.getTaskAttempId().getJobID().equals(runningJobId);
+            if (!pastRunning && reportRunning) {
+                return true;
+            }
+            return past.getTaskAttempId().compareTo(report.getTaskAttempId()) <= 0;
+        }
+    }
     void run(MapReduceExecutorTask task,
             int mapTaskCount, int reduceTaskCount, ProcessState state)
     {
@@ -163,14 +228,7 @@ public class MapReduceExecutor
         EmbulkMapReduce.setMapTaskCount(job.getConfiguration(), mapTaskCount);  // used by EmbulkInputFormat
         EmbulkMapReduce.setStateDirectoryPath(job.getConfiguration(), stateDir);
-        // create state dir
-        try {
-            stateDir.getFileSystem(job.getConfiguration()).mkdirs(stateDir);
-        } catch (IOException ex) {
-            throw new RuntimeException(ex);
-        }
-        // archive plugins
+        // archive plugins (also create state dir)
         PluginArchive archive = new PluginArchive.Builder()
             .addLoadedRubyGems(jruby)
             .build();
@@ -211,26 +269,33 @@ public class MapReduceExecutor
         try {
             job.submit();
+            TaskReportSet reportSet = new TaskReportSet(job.getJobID());
             int interval = Job.getCompletionPollInterval(job.getConfiguration());
-            while (!job.isComplete()) {
+            while (true) {
+                EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
+                if (status.isComplete()) {
+                    break;
+                }
+                log.info(String.format("map %.1f%% reduce %.1f%%",
+                            status.getMapProgress() * 100, status.getReduceProgress() * 100));
                 //if (job.getState() == JobStatus.State.PREP) {
                 //    continue;
                 //}
-                log.info(String.format("map %.1f%% reduce %.1f%%",
-                            job.mapProgress() * 100, job.reduceProgress() * 100));
                 Thread.sleep(interval);
-                updateProcessState(job, mapTaskCount, stateDir, state, modelManager, true);
+                updateProcessState(job, reportSet, stateDir, state, modelManager, true);
             }
-            // Here sets skipUnavailable=false to updateProcessState method because race
-            // condition of AttemptReport.readFrom and .writeTo does not happen here.
+            EmbulkMapReduce.JobStatus status = EmbulkMapReduce.getJobStatus(job);
             log.info(String.format("map %.1f%% reduce %.1f%%",
-                        job.mapProgress() * 100, job.reduceProgress() * 100));
-            updateProcessState(job, mapTaskCount, stateDir, state, modelManager, false);
+                        status.getMapProgress() * 100, status.getReduceProgress() * 100));
+            // Here sets inProgress=false to updateProcessState method to tell that race
+            // condition of AttemptReport.readFrom and .writeTo does not happen here.
+            updateProcessState(job, reportSet, stateDir, state, modelManager, false);
-            Counters counters = job.getCounters();
+            Counters counters = EmbulkMapReduce.getJobCounters(job);
             if (counters != null) {
                 log.info(counters.toString());
             }
@@ -292,50 +357,39 @@ public class MapReduceExecutor
             + String.format("%09d", time.getNano());
     }
-    private void updateProcessState(Job job, int mapTaskCount, Path stateDir,
-            ProcessState state, ModelManager modelManager, boolean skipUnavailable) throws IOException
+    private void updateProcessState(Job job, TaskReportSet reportSet, Path stateDir,
+            ProcessState state, ModelManager modelManager, boolean inProgress) throws IOException
     {
-        List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager);
+        List<AttemptReport> reports = getAttemptReports(job.getConfiguration(), stateDir, modelManager,
+                inProgress, job.getJobID());
         for (AttemptReport report : reports) {
-            if (report == null) {
-                continue;
-            }
-            if (!report.isAvailable()) {
-                if (skipUnavailable) {
-                    continue;
-                } else {
-                    throw report.getUnavailableException();
-                }
-            }
-            AttemptState attempt = report.getAttemptState();
-            if (attempt.getInputTaskIndex().isPresent()) {
-                updateState(state.getInputTaskState(attempt.getInputTaskIndex().get()), attempt, true);
-            }
-            if (attempt.getOutputTaskIndex().isPresent()) {
-                updateState(state.getOutputTaskState(attempt.getOutputTaskIndex().get()), attempt, false);
+            if (report.isAvailable()) {
+                reportSet.update(report);
             }
         }
+        for (AttemptReport report : reportSet.getLatestInputAttemptReports()) {
+            updateTaskState(state.getInputTaskState(report.getInputTaskIndex().get()), report.getAttemptState(), true);
+        }
+        for (AttemptReport report : reportSet.getLatestOutputAttemptReports()) {
+            updateTaskState(state.getOutputTaskState(report.getOutputTaskIndex().get()), report.getAttemptState(), true);
+        }
     }
-    private static void updateState(TaskState state, AttemptState attempt, boolean isInput)
+    private static void updateTaskState(TaskState state, AttemptState attempt, boolean isInput)
     {
         state.start();
+        Optional<CommitReport> commitReport = isInput ? attempt.getInputCommitReport() : attempt.getOutputCommitReport();
+        boolean committed = commitReport.isPresent();
         if (attempt.getException().isPresent()) {
             if (!state.isCommitted()) {
                 state.setException(new RemoteTaskFailedException(attempt.getException().get()));
             }
-        } else if (
-                (isInput && attempt.getInputCommitReport().isPresent()) ||
-                (!isInput && attempt.getOutputCommitReport().isPresent())) {
-            state.resetException();
-        }
-        if (isInput && attempt.getInputCommitReport().isPresent()) {
-            state.setCommitReport(attempt.getInputCommitReport().get());
-            state.finish();
         }
-        if (!isInput && attempt.getOutputCommitReport().isPresent()) {
-            state.setCommitReport(attempt.getOutputCommitReport().get());
+        if (commitReport.isPresent()) {
+            state.setCommitReport(commitReport.get());
             state.finish();
         }
     }
@@ -370,6 +424,16 @@ public class MapReduceExecutor
             return unavailableException;
         }
+        public Optional<Integer> getInputTaskIndex()
+        {
+            return attemptState == null ? Optional.<Integer>absent() : attemptState.getInputTaskIndex();
+        }
+        public Optional<Integer> getOutputTaskIndex()
+        {
+            return attemptState == null ? Optional.<Integer>absent() : attemptState.getOutputTaskIndex();
+        }
         public boolean isInputCommitted()
         {
             return attemptState != null && attemptState.getInputCommitReport().isPresent();
@@ -380,28 +444,39 @@ public class MapReduceExecutor
             return attemptState != null && attemptState.getOutputCommitReport().isPresent();
         }
+        public TaskAttemptID getTaskAttempId()
+        {
+            return attemptId;
+        }
         public AttemptState getAttemptState()
         {
             return attemptState;
         }
     }
-    private static final int TASK_EVENT_FETCH_SIZE = 100;
     private static List<AttemptReport> getAttemptReports(Configuration config,
-            Path stateDir, ModelManager modelManager) throws IOException
+            Path stateDir, ModelManager modelManager,
+            boolean jobIsRunning, JobID runningJobId) throws IOException
     {
         ImmutableList.Builder<AttemptReport> builder = ImmutableList.builder();
         for (TaskAttemptID aid : EmbulkMapReduce.listAttempts(config, stateDir)) {
+            boolean concurrentWriteIsPossible = aid.getJobID().equals(runningJobId) && jobIsRunning;
             try {
                 AttemptState state = EmbulkMapReduce.readAttemptStateFile(config,
-                        stateDir, aid, modelManager);
+                        stateDir, aid, modelManager, concurrentWriteIsPossible);
                 builder.add(new AttemptReport(aid, state));
             } catch (IOException ex) {
-                // Either of:
-                //   * race condition of AttemptReport.writeTo and .readFrom
-                //   * FileSystem is not working
-                // See also comments on MapReduceExecutor.readAttemptStateFile.isRetryableException.
+                // See comments on readAttemptStateFile for the possible error causes.
+                if (!concurrentWriteIsPossible) {
+                    if (!(ex instanceof EOFException)) {
+                        // f) HDFS is broken. This is critical problem which should throw an exception
+                        throw new RuntimeException(ex);
+                    }
+                    // HDFS is working but file is corrupted. It is always possible that the directly
+                    // contains corrupted file created by past attempts of retried task or job. Ignore it.
+                }
+                // if concurrentWriteIsPossible, there're no ways to tell the cause. Ignore it.
                 builder.add(new AttemptReport(aid, ex));
             }
         }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: embulk-executor-mapreduce
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Sadayuki Furuhashi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-06-27 00:00:00.000000000 Z
+date: 2015-07-02 00:00:00.000000000 Z
 dependencies: []
 description: Executes tasks on Hadoop.
 email:
@@ -60,7 +60,7 @@ files:
 - classpath/curator-client-2.6.0.jar
 - classpath/curator-framework-2.6.0.jar
 - classpath/curator-recipes-2.6.0.jar
-- classpath/embulk-executor-mapreduce-0.1.2.jar
+- classpath/embulk-executor-mapreduce-0.1.3.jar
 - classpath/gson-2.2.4.jar
 - classpath/hadoop-annotations-2.6.0.jar
 - classpath/hadoop-auth-2.6.0.jar