embulk-executor-mapreduce 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 995cd8754f48d4d4e0d222bbad7d39dfdad6648f
4
- data.tar.gz: 901b6a9298e76ed45ab7840ca4e4135bb68a9d8d
3
+ metadata.gz: 5c531e6955469a01f0e2ed716a65fdf228ae95ba
4
+ data.tar.gz: d8724a7abcaedd7549a397d2b14df6edb2832d52
5
5
  SHA512:
6
- metadata.gz: 02cbb013f7b83f51c787e649d1efd0e69b726af56636b2487b5d1b277bb285521db44a5b4b1ee45c59d7e5c0a71515bc9b590cdb5a3f90f5fd871581e4d23286
7
- data.tar.gz: f66984fd6801ee826b6c4a46db1c326aa699584497224daed575e11c6f83348f5cbf2ee469c26a856bf4d9479d57e609613abdc7fec69f3f2ccbde8ac612340c
6
+ metadata.gz: 9983809cd453596cf3fc683f75a22b43dbcca0abf202c7ad7bdc9cbb640c673e95be706f6d49ad04bb3881f8668bc9b8e3946115d54e2a14e90dfb163fbd9b0f
7
+ data.tar.gz: 7b272911c78f9bac87fa867f0e3978a96f31c8fb348b150410a6991e1f5f425782450c9422ca4022e2cb15abc61037caaf080fb196f48bd0a441e3e81cc31b15
@@ -16,7 +16,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
16
16
  import com.fasterxml.jackson.annotation.JsonValue;
17
17
  import org.apache.hadoop.mapreduce.TaskAttemptID;
18
18
  import org.embulk.config.ModelManager;
19
- import org.embulk.config.CommitReport;
19
+ import org.embulk.config.TaskReport;
20
20
 
21
21
  public class AttemptState
22
22
  {
@@ -24,8 +24,8 @@ public class AttemptState
24
24
  private final Optional<Integer> inputTaskIndex;
25
25
  private final Optional<Integer> outputTaskIndex;
26
26
  private Optional<String> exception;
27
- private Optional<CommitReport> inputCommitReport;
28
- private Optional<CommitReport> outputCommitReport;
27
+ private Optional<TaskReport> inputTaskReport;
28
+ private Optional<TaskReport> outputTaskReport;
29
29
 
30
30
  public AttemptState(TaskAttemptID attemptId, Optional<Integer> inputTaskIndex, Optional<Integer> outputTaskIndex)
31
31
  {
@@ -40,12 +40,12 @@ public class AttemptState
40
40
  @JsonProperty("inputTaskIndex") Optional<Integer> inputTaskIndex,
41
41
  @JsonProperty("outputTaskIndex") Optional<Integer> outputTaskIndex,
42
42
  @JsonProperty("exception") Optional<String> exception,
43
- @JsonProperty("inputCommitReport") Optional<CommitReport> inputCommitReport,
44
- @JsonProperty("outputCommitReport") Optional<CommitReport> outputCommitReport)
43
+ @JsonProperty("inputTaskReport") Optional<TaskReport> inputTaskReport,
44
+ @JsonProperty("outputTaskReport") Optional<TaskReport> outputTaskReport)
45
45
  {
46
46
  this(TaskAttemptID.forName(attemptId),
47
47
  inputTaskIndex, outputTaskIndex, exception,
48
- inputCommitReport, outputCommitReport);
48
+ inputTaskReport, outputTaskReport);
49
49
  }
50
50
 
51
51
  public AttemptState(
@@ -53,15 +53,15 @@ public class AttemptState
53
53
  Optional<Integer> inputTaskIndex,
54
54
  Optional<Integer> outputTaskIndex,
55
55
  Optional<String> exception,
56
- Optional<CommitReport> inputCommitReport,
57
- Optional<CommitReport> outputCommitReport)
56
+ Optional<TaskReport> inputTaskReport,
57
+ Optional<TaskReport> outputTaskReport)
58
58
  {
59
59
  this.attemptId = attemptId;
60
60
  this.inputTaskIndex = inputTaskIndex;
61
61
  this.outputTaskIndex = outputTaskIndex;
62
62
  this.exception = exception;
63
- this.inputCommitReport = inputCommitReport;
64
- this.outputCommitReport = outputCommitReport;
63
+ this.inputTaskReport = inputTaskReport;
64
+ this.outputTaskReport = outputTaskReport;
65
65
  }
66
66
 
67
67
  @JsonIgnore
@@ -112,28 +112,28 @@ public class AttemptState
112
112
  return exception;
113
113
  }
114
114
 
115
- @JsonProperty("inputCommitReport")
116
- public Optional<CommitReport> getInputCommitReport()
115
+ @JsonProperty("inputTaskReport")
116
+ public Optional<TaskReport> getInputTaskReport()
117
117
  {
118
- return inputCommitReport;
118
+ return inputTaskReport;
119
119
  }
120
120
 
121
- @JsonProperty("outputCommitReport")
122
- public Optional<CommitReport> getOutputCommitReport()
121
+ @JsonProperty("outputTaskReport")
122
+ public Optional<TaskReport> getOutputTaskReport()
123
123
  {
124
- return outputCommitReport;
124
+ return outputTaskReport;
125
125
  }
126
126
 
127
127
  @JsonIgnore
128
- public void setInputCommitReport(CommitReport inputCommitReport)
128
+ public void setInputTaskReport(TaskReport inputTaskReport)
129
129
  {
130
- this.inputCommitReport = Optional.of(inputCommitReport);
130
+ this.inputTaskReport = Optional.of(inputTaskReport);
131
131
  }
132
132
 
133
133
  @JsonIgnore
134
- public void setOutputCommitReport(CommitReport outputCommitReport)
134
+ public void setOutputTaskReport(TaskReport outputTaskReport)
135
135
  {
136
- this.outputCommitReport = Optional.of(outputCommitReport);
136
+ this.outputTaskReport = Optional.of(outputTaskReport);
137
137
  }
138
138
 
139
139
  public void writeTo(OutputStream out, ModelManager modelManager) throws IOException
@@ -0,0 +1,13 @@
1
+ package org.embulk.executor.mapreduce;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+ import org.embulk.EmbulkEmbed;
5
+
6
+ public class DefaultEmbulkFactory
7
+ {
8
+ public EmbulkEmbed.Bootstrap bootstrap(ConfigSource systemConfig)
9
+ {
10
+ return new EmbulkEmbed.Bootstrap()
11
+ .setSystemConfig(systemConfig);
12
+ }
13
+ }
@@ -2,6 +2,7 @@ package org.embulk.executor.mapreduce;
2
2
 
3
3
  import java.util.List;
4
4
  import java.util.ArrayList;
5
+ import java.util.Map;
5
6
  import java.util.concurrent.Callable;
6
7
  import java.util.concurrent.ExecutionException;
7
8
  import java.io.File;
@@ -10,6 +11,7 @@ import java.io.ByteArrayInputStream;
10
11
  import java.io.IOException;
11
12
  import java.io.EOFException;
12
13
  import java.io.InterruptedIOException;
14
+ import java.lang.reflect.Method;
13
15
  import java.lang.reflect.InvocationTargetException;
14
16
  import com.google.inject.Injector;
15
17
  import com.google.common.base.Optional;
@@ -35,7 +37,8 @@ import org.apache.hadoop.mapreduce.MRConfig;
35
37
  import org.embulk.config.ModelManager;
36
38
  import org.embulk.config.ConfigSource;
37
39
  import org.embulk.config.ConfigLoader;
38
- import org.embulk.config.CommitReport;
40
+ import org.embulk.config.DataSourceImpl;
41
+ import org.embulk.config.TaskReport;
39
42
  import org.embulk.spi.BufferAllocator;
40
43
  import org.embulk.spi.Exec;
41
44
  import org.embulk.spi.ExecAction;
@@ -44,7 +47,7 @@ import org.embulk.spi.ProcessTask;
44
47
  import org.embulk.spi.util.Executors;
45
48
  import org.embulk.spi.util.RetryExecutor.Retryable;
46
49
  import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
47
- import org.embulk.EmbulkService;
50
+ import org.embulk.EmbulkEmbed;
48
51
  import org.slf4j.Logger;
49
52
 
50
53
  import static java.nio.charset.StandardCharsets.UTF_8;
@@ -52,6 +55,7 @@ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
52
55
 
53
56
  public class EmbulkMapReduce
54
57
  {
58
+ private static final String EMBULK_FACTORY_CLASS = "embulk_factory_class";
55
59
  private static final String SYSTEM_CONFIG_SERVICE_CLASS = "mapreduce_service_class";
56
60
 
57
61
  private static final String CK_SYSTEM_CONFIG = "embulk.mapreduce.systemConfig";
@@ -111,29 +115,41 @@ public class EmbulkMapReduce
111
115
  config.get(CK_TASK));
112
116
  }
113
117
 
114
- public static Injector newEmbulkInstance(Configuration config)
118
+ public static EmbulkEmbed.Bootstrap newEmbulkBootstrap(Configuration config)
115
119
  {
116
120
  ConfigSource systemConfig = getSystemConfig(config);
117
- String serviceClassName = systemConfig.get(String.class, SYSTEM_CONFIG_SERVICE_CLASS, "org.embulk.EmbulkService");
121
+
122
+ // for warnings of old versions
123
+ if (!systemConfig.get(String.class, SYSTEM_CONFIG_SERVICE_CLASS, "org.embulk.EmbulkService").equals("org.embulk.EmbulkService")) {
124
+ throw new RuntimeException("System config 'mapreduce_service_class' is not supported any more. Please use 'embulk_factory_class' instead");
125
+ }
126
+
127
+ String factoryClassName = systemConfig.get(String.class, EMBULK_FACTORY_CLASS, DefaultEmbulkFactory.class.getName());
118
128
 
119
129
  try {
120
- Object obj;
121
- if (serviceClassName.equals("org.embulk.EmbulkService")) {
122
- obj = new EmbulkService(systemConfig);
123
- } else {
124
- Class<?> serviceClass = Class.forName(serviceClassName);
125
- obj = serviceClass.getConstructor(ConfigSource.class).newInstance(systemConfig);
126
- }
130
+ Class<?> factoryClass = Class.forName(factoryClassName);
131
+ Object factory = factoryClass.newInstance();
127
132
 
128
- if (obj instanceof EmbulkService) {
129
- return ((EmbulkService) obj).getInjector();
130
- } else {
131
- return (Injector) obj.getClass().getMethod("getInjector").invoke(obj);
133
+ Object bootstrap;
134
+ try {
135
+ // factory.bootstrap(ConfigSource masterSystemConfig, ConfigSource executorParams)
136
+ Method method = factoryClass.getMethod("bootstrap", ConfigSource.class, ConfigSource.class);
137
+ Map<String, String> hadoopConfig = config.getValByRegex("");
138
+ ConfigSource executorParams = new DataSourceImpl(new ModelManager(null, new ObjectMapper())).set("hadoopConfig", hadoopConfig).getNested("hadoopConfig"); // TODO add a method to embulk that creates an empty DataSource instance
139
+ bootstrap = method.invoke(factory, systemConfig, executorParams);
132
140
  }
141
+ catch (NoSuchMethodException ex) {
142
+ // factory.bootstrap(ConfigSource masterSystemConfig)
143
+ bootstrap = factoryClass.getMethod("bootstrap", ConfigSource.class).invoke(factory, systemConfig);
144
+ }
145
+
146
+ return (EmbulkEmbed.Bootstrap) bootstrap;
133
147
 
134
- } catch (InvocationTargetException ex) {
148
+ }
149
+ catch (InvocationTargetException ex) {
135
150
  throw Throwables.propagate(ex.getCause());
136
- } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | IllegalArgumentException ex) {
151
+ }
152
+ catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | IllegalArgumentException ex) {
137
153
  throw Throwables.propagate(ex);
138
154
  }
139
155
  }
@@ -377,8 +393,7 @@ public class EmbulkMapReduce
377
393
  public static class SessionRunner
378
394
  {
379
395
  private final Configuration config;
380
- private final Injector injector;
381
- private final ModelManager modelManager;
396
+ private final EmbulkEmbed embed;
382
397
  private final MapReduceExecutorTask task;
383
398
  private final ExecSession session;
384
399
  private final File localGemPath;
@@ -386,10 +401,9 @@ public class EmbulkMapReduce
386
401
  public SessionRunner(TaskAttemptContext context)
387
402
  {
388
403
  this.config = context.getConfiguration();
389
- this.injector = newEmbulkInstance(context.getConfiguration());
390
- this.modelManager = injector.getInstance(ModelManager.class);
391
- this.task = getExecutorTask(injector, context.getConfiguration());
392
- this.session = ExecSession.builder(injector).fromExecConfig(task.getExecConfig()).build();
404
+ this.embed = newEmbulkBootstrap(context.getConfiguration()).initialize(); // TODO use initializeCloseable?
405
+ this.task = getExecutorTask(embed.getInjector(), context.getConfiguration());
406
+ this.session = ExecSession.builder(embed.getInjector()).fromExecConfig(task.getExecConfig()).build();
393
407
 
394
408
  try {
395
409
  LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
@@ -403,7 +417,7 @@ public class EmbulkMapReduce
403
417
  public PluginArchive readPluginArchive() throws IOException
404
418
  {
405
419
  localGemPath.mkdirs();
406
- return EmbulkMapReduce.readPluginArchive(localGemPath, config, getStateDirectoryPath(config), modelManager);
420
+ return EmbulkMapReduce.readPluginArchive(localGemPath, config, getStateDirectoryPath(config), embed.getModelManager());
407
421
  }
408
422
 
409
423
  public Configuration getConfiguration()
@@ -413,17 +427,17 @@ public class EmbulkMapReduce
413
427
 
414
428
  public ModelManager getModelManager()
415
429
  {
416
- return modelManager;
430
+ return embed.getModelManager();
417
431
  }
418
432
 
419
433
  public BufferAllocator getBufferAllocator()
420
434
  {
421
- return injector.getInstance(BufferAllocator.class);
435
+ return embed.getBufferAllocator();
422
436
  }
423
437
 
424
438
  public ScriptingContainer getScriptingContainer()
425
439
  {
426
- return injector.getInstance(ScriptingContainer.class);
440
+ return embed.getInjector().getInstance(ScriptingContainer.class);
427
441
  }
428
442
 
429
443
  public MapReduceExecutorTask getMapReduceExecutorTask()
@@ -480,9 +494,9 @@ public class EmbulkMapReduce
480
494
  }
481
495
 
482
496
  @Override
483
- public void inputCommitted(CommitReport report)
497
+ public void inputCommitted(TaskReport report)
484
498
  {
485
- state.setInputCommitReport(report);
499
+ state.setInputTaskReport(report);
486
500
  try {
487
501
  writeAttemptStateFile(config, stateDir, state, modelManager);
488
502
  } catch (IOException e) {
@@ -491,9 +505,9 @@ public class EmbulkMapReduce
491
505
  }
492
506
 
493
507
  @Override
494
- public void outputCommitted(CommitReport report)
508
+ public void outputCommitted(TaskReport report)
495
509
  {
496
- state.setOutputCommitReport(report);
510
+ state.setOutputTaskReport(report);
497
511
  try {
498
512
  writeAttemptStateFile(config, stateDir, state, modelManager);
499
513
  } catch (IOException e) {
@@ -12,7 +12,7 @@ import org.apache.hadoop.conf.Configuration;
12
12
  import org.apache.hadoop.mapreduce.Mapper;
13
13
  import org.apache.hadoop.mapreduce.Reducer;
14
14
  import org.embulk.config.ModelManager;
15
- import org.embulk.config.CommitReport;
15
+ import org.embulk.config.TaskReport;
16
16
  import org.embulk.config.ConfigDiff;
17
17
  import org.embulk.config.TaskSource;
18
18
  import org.embulk.config.ConfigSource;
@@ -218,7 +218,7 @@ public class EmbulkPartitioningMapReduce
218
218
  try {
219
219
  if (!failed) {
220
220
  output.finish();
221
- CommitReport report = output.commit();
221
+ TaskReport report = output.commit();
222
222
  handler.outputCommitted(report);
223
223
  }
224
224
  } finally {
@@ -266,7 +266,7 @@ public class EmbulkPartitioningMapReduce
266
266
 
267
267
  public void cleanup(TaskSource taskSource,
268
268
  Schema schema, int taskCount,
269
- List<CommitReport> successCommitReports)
269
+ List<TaskReport> successTaskReports)
270
270
  {
271
271
  // won't be called
272
272
  throw new RuntimeException("");
@@ -301,9 +301,9 @@ public class EmbulkPartitioningMapReduce
301
301
  public void abort()
302
302
  { }
303
303
 
304
- public CommitReport commit()
304
+ public TaskReport commit()
305
305
  {
306
- return Exec.newCommitReport();
306
+ return Exec.newTaskReport();
307
307
  }
308
308
  };
309
309
  }
@@ -40,7 +40,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
40
40
  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
41
41
  import org.embulk.exec.ForSystemConfig;
42
42
  import org.embulk.config.ConfigSource;
43
- import org.embulk.config.CommitReport;
43
+ import org.embulk.config.TaskReport;
44
44
  import org.embulk.config.ConfigException;
45
45
  import org.embulk.config.TaskSource;
46
46
  import org.embulk.config.ModelManager;
@@ -78,7 +78,7 @@ public class MapReduceExecutor
78
78
  final int outputTaskCount;
79
79
  final int reduceTaskCount;
80
80
 
81
- if (task.getPartitioning().isPresent()) {
81
+ if (task.getPartitioning().isPresent() && inputTaskCount > 0) { // here can disable partitioning and force set reduceTaskCount and outputTaskCount to 0 if inputTaskCount is 0
82
82
  reduceTaskCount = task.getReducers().or(inputTaskCount);
83
83
  if (reduceTaskCount <= 0) {
84
84
  throw new ConfigException("Reducers must be larger than 1 if partition: is set");
@@ -381,15 +381,15 @@ public class MapReduceExecutor
381
381
  private static void updateTaskState(TaskState state, AttemptState attempt, boolean isInput)
382
382
  {
383
383
  state.start();
384
- Optional<CommitReport> commitReport = isInput ? attempt.getInputCommitReport() : attempt.getOutputCommitReport();
385
- boolean committed = commitReport.isPresent();
384
+ Optional<TaskReport> taskReport = isInput ? attempt.getInputTaskReport() : attempt.getOutputTaskReport();
385
+ boolean committed = taskReport.isPresent();
386
386
  if (attempt.getException().isPresent()) {
387
387
  if (!state.isCommitted()) {
388
388
  state.setException(new RemoteTaskFailedException(attempt.getException().get()));
389
389
  }
390
390
  }
391
- if (commitReport.isPresent()) {
392
- state.setCommitReport(commitReport.get());
391
+ if (taskReport.isPresent()) {
392
+ state.setTaskReport(taskReport.get());
393
393
  state.finish();
394
394
  }
395
395
  }
@@ -436,12 +436,12 @@ public class MapReduceExecutor
436
436
 
437
437
  public boolean isInputCommitted()
438
438
  {
439
- return attemptState != null && attemptState.getInputCommitReport().isPresent();
439
+ return attemptState != null && attemptState.getInputTaskReport().isPresent();
440
440
  }
441
441
 
442
442
  public boolean isOutputCommitted()
443
443
  {
444
- return attemptState != null && attemptState.getOutputCommitReport().isPresent();
444
+ return attemptState != null && attemptState.getOutputTaskReport().isPresent();
445
445
  }
446
446
 
447
447
  public TaskAttemptID getTaskAttempId()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-executor-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-28 00:00:00.000000000 Z
11
+ date: 2015-08-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Executes tasks on Hadoop.
14
14
  email:
@@ -22,6 +22,7 @@ files:
22
22
  - src/main/java/org/embulk/executor/mapreduce/AttemptState.java
23
23
  - src/main/java/org/embulk/executor/mapreduce/BufferWritable.java
24
24
  - src/main/java/org/embulk/executor/mapreduce/BufferedPagePartitioner.java
25
+ - src/main/java/org/embulk/executor/mapreduce/DefaultEmbulkFactory.java
25
26
  - src/main/java/org/embulk/executor/mapreduce/EmbulkInputFormat.java
26
27
  - src/main/java/org/embulk/executor/mapreduce/EmbulkInputSplit.java
27
28
  - src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java
@@ -60,7 +61,7 @@ files:
60
61
  - classpath/curator-client-2.6.0.jar
61
62
  - classpath/curator-framework-2.6.0.jar
62
63
  - classpath/curator-recipes-2.6.0.jar
63
- - classpath/embulk-executor-mapreduce-0.1.5.jar
64
+ - classpath/embulk-executor-mapreduce-0.2.0.jar
64
65
  - classpath/gson-2.2.4.jar
65
66
  - classpath/hadoop-annotations-2.6.0.jar
66
67
  - classpath/hadoop-auth-2.6.0.jar