embulk-executor-mapreduce 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4461eebeecc53f99b9b9683d7553a585a87e1a1f
4
- data.tar.gz: a019cd9224918ae2721482a9cf92c9c8148a05a6
3
+ metadata.gz: 0f276edacaa4ab8219234ec69e521edaee7c5104
4
+ data.tar.gz: 1bff944c1f3ab1b3a406fd871ec43888eeff43a1
5
5
  SHA512:
6
- metadata.gz: 91e107ce10160fc097930b139f07b59dcb80b1201dde0723cc302fd1e142a283ad8a817d7518ac7684f0b066ed55537c20ae0af5446230eb0f63026d9bf7e21d
7
- data.tar.gz: bc045316fedf83de62e34bbf9304152680d90e46ec6fa885fc054aca50d9a967b2d8f8eb3fa86e7c685badba7e3f15834f985fee923c5bf54c9f5197f43a68fb
6
+ metadata.gz: f49485f369fde98696410fc98cd38ff4e27f83a1286f1e892bc2bd06345cfb2cc28049a7ce8751f84af37663c1633345942431555c97ef4190d55f0c376724d8
7
+ data.tar.gz: 7f4460a9690a2462f87b179398bc2380f699fed06112034bfa10168e1547152fc6afc45816ea69f2261559aba0c9dccc47d633d4cac26a3de571afc52685f039
@@ -196,7 +196,7 @@ public class EmbulkMapReduce
196
196
 
197
197
  public static JobStatus getJobStatus(final Job job) throws IOException
198
198
  {
199
- return hadoopOperationWithRetry("getting job status", new Callable<JobStatus>() {
199
+ return hadoopOperationWithRetry("Getting job status", new Callable<JobStatus>() {
200
200
  public JobStatus call() throws IOException
201
201
  {
202
202
  return new JobStatus(job.isComplete(), job.mapProgress(), job.reduceProgress());
@@ -206,7 +206,7 @@ public class EmbulkMapReduce
206
206
 
207
207
  public static Counters getJobCounters(final Job job) throws IOException
208
208
  {
209
- return hadoopOperationWithRetry("getting job counters", new Callable<Counters>() {
209
+ return hadoopOperationWithRetry("Getting job counters", new Callable<Counters>() {
210
210
  public Counters call() throws IOException
211
211
  {
212
212
  return job.getCounters();
@@ -217,7 +217,7 @@ public class EmbulkMapReduce
217
217
  public static List<TaskAttemptID> listAttempts(final Configuration config,
218
218
  final Path stateDir) throws IOException
219
219
  {
220
- return hadoopOperationWithRetry("getting list of attempt state files on "+stateDir, new Callable<List<TaskAttemptID>>() {
220
+ return hadoopOperationWithRetry("Getting list of attempt state files on "+stateDir, new Callable<List<TaskAttemptID>>() {
221
221
  public List<TaskAttemptID> call() throws IOException
222
222
  {
223
223
  FileStatus[] stats = stateDir.getFileSystem(config).listStatus(stateDir);
@@ -244,7 +244,7 @@ public class EmbulkMapReduce
244
244
  final PluginArchive archive, final ModelManager modelManager) throws IOException
245
245
  {
246
246
  final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
247
- hadoopOperationWithRetry("writing plugin archive to "+path, new Callable<Void>() {
247
+ hadoopOperationWithRetry("Writing plugin archive to "+path, new Callable<Void>() {
248
248
  public Void call() throws IOException
249
249
  {
250
250
  stateDir.getFileSystem(config).mkdirs(stateDir);
@@ -264,7 +264,7 @@ public class EmbulkMapReduce
264
264
  Path stateDir, final ModelManager modelManager) throws IOException
265
265
  {
266
266
  final Path path = new Path(stateDir, PLUGIN_ARCHIVE_FILE_NAME);
267
- return hadoopOperationWithRetry("reading plugin archive file from "+path, new Callable<PluginArchive>() {
267
+ return hadoopOperationWithRetry("Reading plugin archive file from "+path, new Callable<PluginArchive>() {
268
268
  public PluginArchive call() throws IOException
269
269
  {
270
270
  List<PluginArchive.GemSpec> specs = modelManager.readObject(
@@ -281,7 +281,7 @@ public class EmbulkMapReduce
281
281
  Path stateDir, final AttemptState state, final ModelManager modelManager) throws IOException
282
282
  {
283
283
  final Path path = new Path(stateDir, state.getAttemptId().toString());
284
- hadoopOperationWithRetry("writing attempt state file to "+path, new Callable<Void>() {
284
+ hadoopOperationWithRetry("Writing attempt state file to "+path, new Callable<Void>() {
285
285
  public Void call() throws IOException
286
286
  {
287
287
  try (FSDataOutputStream out = path.getFileSystem(config).create(path, true)) {
@@ -326,7 +326,8 @@ public class EmbulkMapReduce
326
326
  // e) EOFException: file exists but its format is invalid because this task is retried and last job/attempt left corrupted files (such as empty, partially written, etc)
327
327
  // f) IOException: FileSystem is not working
328
328
  //
329
- if (exception instanceof EOFException && !concurrentWriteIsPossible) {
329
+ if (exception instanceof EOFException) {
330
+ // a) and b) don't need retrying. See MapReduceExecutor.getAttemptReports that ignores EOFException.
330
331
  // e) is not recoverable.
331
332
  return false;
332
333
  }
@@ -337,8 +338,9 @@ public class EmbulkMapReduce
337
338
  public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
338
339
  throws RetryGiveupException
339
340
  {
340
- log.warn("Retrying opening state file {} ({}/{}) error: {}",
341
- path, retryCount, retryLimit, exception);
341
+ log.warn("Reading a state file failed. Retrying {}/{} after {} seconds. Message: {}",
342
+ retryCount, retryLimit, retryWait, exception.getMessage(),
343
+ retryCount % 3 == 0 ? exception : null);
342
344
  }
343
345
 
344
346
  @Override
@@ -384,8 +386,9 @@ public class EmbulkMapReduce
384
386
  public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
385
387
  throws RetryGiveupException
386
388
  {
387
- log.warn("Retrying {} ({}/{}) error: {}",
388
- message, retryCount, retryLimit, exception);
389
+ log.warn("{} failed. Retrying {}/{} after {} seconds. Message: {}",
390
+ message, retryCount, retryLimit, retryWait, exception.getMessage(),
391
+ retryCount % 3 == 0 ? exception : null);
389
392
  }
390
393
 
391
394
  @Override
@@ -6,7 +6,7 @@ import java.util.ArrayList;
6
6
  import java.util.Collection;
7
7
  import java.util.Set;
8
8
  import java.util.Map;
9
- import java.util.HashSet;
9
+ import java.util.LinkedHashSet;
10
10
  import java.util.HashMap;
11
11
  import java.io.File;
12
12
  import java.io.IOException;
@@ -333,7 +333,7 @@ public class MapReduceExecutor
333
333
 
334
334
  private List<Path> collectJars(List<String> extraJars, List<String> excludeJars)
335
335
  {
336
- Set<Path> set = new HashSet<Path>();
336
+ Set<Path> set = new LinkedHashSet<Path>();
337
337
 
338
338
  collectURLClassLoaderJars(set, Exec.class.getClassLoader());
339
339
  collectURLClassLoaderJars(set, MapReduceExecutor.class.getClassLoader());
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-executor-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-04-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Executes tasks on Hadoop.
14
14
  email:
@@ -84,7 +84,7 @@ files:
84
84
  - classpath/curator-client-2.6.0.jar
85
85
  - classpath/curator-framework-2.6.0.jar
86
86
  - classpath/curator-recipes-2.6.0.jar
87
- - classpath/embulk-executor-mapreduce-0.2.5.jar
87
+ - classpath/embulk-executor-mapreduce-0.2.6.jar
88
88
  - classpath/gson-2.2.4.jar
89
89
  - classpath/hadoop-annotations-2.6.0.jar
90
90
  - classpath/hadoop-auth-2.6.0.jar