embulk 0.5.5 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/build.gradle +1 -1
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -7
  5. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +664 -0
  6. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +5 -0
  7. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +130 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
  9. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +3 -3
  10. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +7 -6
  12. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +35 -3
  14. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +4 -1
  15. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +1 -1
  16. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/Page.java +6 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +73 -1
  19. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +118 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +70 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +92 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +17 -3
  24. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
  25. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
  26. data/embulk-docs/src/release.rst +1 -0
  27. data/embulk-docs/src/release/release-0.6.0.rst +34 -0
  28. data/lib/embulk/executor_plugin.rb +23 -0
  29. data/lib/embulk/java_plugin.rb +5 -0
  30. data/lib/embulk/plugin.rb +13 -2
  31. data/lib/embulk/version.rb +1 -1
  32. metadata +15 -5
  33. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +0 -660
@@ -13,6 +13,7 @@ import org.embulk.config.ModelManager;
13
13
  import org.embulk.spi.time.DateTimeZoneSerDe;
14
14
  import org.embulk.spi.time.TimestampSerDe;
15
15
  import org.embulk.spi.ParserPlugin;
16
+ import org.embulk.spi.ExecutorPlugin;
16
17
  import org.embulk.spi.BufferAllocator;
17
18
  import org.embulk.spi.util.CharsetSerDe;
18
19
  import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
@@ -33,6 +34,10 @@ public class ExecModule
33
34
  registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
34
35
  registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
35
36
 
37
+ // LocalExecutorPlugin
38
+ binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
39
+ registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
40
+
36
41
  // serde
37
42
  ObjectMapperModule mapper = new ObjectMapperModule();
38
43
  DateTimeZoneSerDe.configure(mapper);
@@ -0,0 +1,130 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.concurrent.Callable;
6
+ import java.util.concurrent.Future;
7
+ import java.util.concurrent.ExecutorService;
8
+ import java.util.concurrent.ExecutionException;
9
+ import org.slf4j.Logger;
10
+ import com.google.inject.Inject;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.CommitReport;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.spi.ExecutorPlugin;
15
+ import org.embulk.spi.ProcessTask;
16
+ import org.embulk.spi.ProcessState;
17
+ import org.embulk.spi.TaskState;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.util.Executors;
20
+ import org.embulk.spi.util.Executors.ProcessStateCallback;
21
+
22
+ public class LocalExecutorPlugin
23
+ implements ExecutorPlugin
24
+ {
25
+ private final ExecutorService executor;
26
+
27
+ @Inject
28
+ public LocalExecutorPlugin(LocalThreadExecutor executor)
29
+ {
30
+ this.executor = executor.getExecutorService();
31
+ }
32
+
33
+ @Override
34
+ public void transaction(ConfigSource config, Schema outputSchema, final int inputTaskCount,
35
+ ExecutorPlugin.Control control)
36
+ {
37
+ control.transaction(outputSchema, inputTaskCount, new Executor() {
38
+ public void execute(ProcessTask task, ProcessState state)
39
+ {
40
+ localExecute(task, inputTaskCount, state);
41
+ }
42
+ });
43
+ }
44
+
45
+ private void localExecute(ProcessTask task, int taskCount, ProcessState state)
46
+ {
47
+ Logger log = Exec.getLogger(LocalExecutorPlugin.class);
48
+
49
+ state.initialize(taskCount, taskCount);
50
+
51
+ List<Future<Throwable>> futures = new ArrayList<>(taskCount);
52
+ try {
53
+ for (int i=0; i < taskCount; i++) {
54
+ if (state.getOutputTaskState(i).isCommitted()) {
55
+ log.warn("Skipped resumed task {}", i);
56
+ futures.add(null); // resumed
57
+ } else {
58
+ futures.add(startProcessor(task, i, state));
59
+ }
60
+ }
61
+ showProgress(log, state, taskCount);
62
+
63
+ for (int i=0; i < taskCount; i++) {
64
+ if (futures.get(i) == null) {
65
+ continue;
66
+ }
67
+ try {
68
+ state.getInputTaskState(i).setException(futures.get(i).get());
69
+ } catch (ExecutionException ex) {
70
+ state.getInputTaskState(i).setException(ex.getCause());
71
+ //Throwables.propagate(ex.getCause());
72
+ } catch (InterruptedException ex) {
73
+ state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
74
+ }
75
+ showProgress(log, state, taskCount);
76
+ }
77
+ } finally {
78
+ for (Future<Throwable> future : futures) {
79
+ if (future != null && !future.isDone()) {
80
+ future.cancel(true);
81
+ // TODO join?
82
+ }
83
+ }
84
+ }
85
+ }
86
+
87
+ private void showProgress(Logger log, ProcessState state, int taskCount)
88
+ {
89
+ int started = 0;
90
+ int finished = 0;
91
+ for (int i=0; i < taskCount; i++) {
92
+ if (state.getInputTaskState(i).isStarted()) { started++; }
93
+ if (state.getOutputTaskState(i).isFinished()) { finished++; }
94
+ }
95
+
96
+ log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
97
+ }
98
+
99
+ private Future<Throwable> startProcessor(final ProcessTask task, final int taskIndex, final ProcessState state)
100
+ {
101
+ return executor.submit(new Callable<Throwable>() {
102
+ public Throwable call()
103
+ {
104
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
105
+ Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
106
+ public void started()
107
+ {
108
+ state.getInputTaskState(taskIndex).start();
109
+ state.getOutputTaskState(taskIndex).start();
110
+ }
111
+
112
+ public void inputCommitted(CommitReport report)
113
+ {
114
+ state.getInputTaskState(taskIndex).setCommitReport(report);
115
+ }
116
+
117
+ public void outputCommitted(CommitReport report)
118
+ {
119
+ state.getOutputTaskState(taskIndex).setCommitReport(report);
120
+ }
121
+ });
122
+ return null;
123
+ } finally {
124
+ state.getInputTaskState(taskIndex).finish();
125
+ state.getOutputTaskState(taskIndex).finish();
126
+ }
127
+ }
128
+ });
129
+ }
130
+ }
@@ -0,0 +1,34 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.concurrent.Executors;
4
+ import java.util.concurrent.ExecutorService;
5
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
6
+ import com.google.inject.Inject;
7
+ import org.embulk.config.ConfigSource;
8
+
9
+ /*
10
+ * Injected in SINGLETON scope at ExecModule
11
+ */
12
+ public class LocalThreadExecutor
13
+ {
14
+ private final ExecutorService executor;
15
+
16
+ @Inject
17
+ public LocalThreadExecutor(@ForSystemConfig ConfigSource systemConfig)
18
+ {
19
+ int defaultMaxThreads = Runtime.getRuntime().availableProcessors() * 2;
20
+ int maxThreads = systemConfig.get(Integer.class, "max_threads", defaultMaxThreads);
21
+ this.executor = Executors.newFixedThreadPool(maxThreads,
22
+ new ThreadFactoryBuilder()
23
+ .setNameFormat("embulk-executor-%d")
24
+ .setDaemon(true)
25
+ .build());
26
+ }
27
+
28
+ public ExecutorService getExecutorService()
29
+ {
30
+ return executor;
31
+ }
32
+
33
+ // TODO shutdown
34
+ }
@@ -9,10 +9,10 @@ import org.embulk.spi.BufferAllocator;
9
9
  public class PooledBufferAllocator
10
10
  implements BufferAllocator
11
11
  {
12
- private PooledByteBufAllocator nettyBuffer;
12
+ private static final int DEFAULT_BUFFER_SIZE = 32*1024;
13
+ private static final int MINIMUM_BUFFER_SIZE = 8*1024;
13
14
 
14
- private int DEFAULT_BUFFER_SIZE = 32*1024;
15
- private int MINIMUM_BUFFER_SIZE = 8*1024;
15
+ private final PooledByteBufAllocator nettyBuffer;
16
16
 
17
17
  public PooledBufferAllocator()
18
18
  {
@@ -80,7 +80,7 @@ public class PreviewExecutor
80
80
 
81
81
  protected List<FilterPlugin> newFilterPlugins(PreviewTask task)
82
82
  {
83
- return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
83
+ return Filters.newFilterPluginsFromConfigSources(Exec.session(), task.getFilterConfigs());
84
84
  }
85
85
 
86
86
  private PreviewResult doPreview(ConfigSource config)
@@ -1,6 +1,7 @@
1
1
  package org.embulk.exec;
2
2
 
3
3
  import java.util.List;
4
+ import com.google.common.base.Optional;
4
5
  import com.fasterxml.jackson.annotation.JsonCreator;
5
6
  import com.fasterxml.jackson.annotation.JsonProperty;
6
7
  import org.embulk.config.TaskSource;
@@ -16,8 +17,8 @@ public class ResumeState
16
17
  private final TaskSource outputTaskSource;
17
18
  private final Schema inputSchema;
18
19
  private final Schema outputSchema;
19
- private final List<CommitReport> inputCommitReports;
20
- private final List<CommitReport> outputCommitReports;
20
+ private final List<Optional<CommitReport>> inputCommitReports;
21
+ private final List<Optional<CommitReport>> outputCommitReports;
21
22
 
22
23
  @JsonCreator
23
24
  public ResumeState(
@@ -26,8 +27,8 @@ public class ResumeState
26
27
  @JsonProperty("out_task") TaskSource outputTaskSource,
27
28
  @JsonProperty("in_schema") Schema inputSchema,
28
29
  @JsonProperty("out_schema") Schema outputSchema,
29
- @JsonProperty("in_reports") List<CommitReport> inputCommitReports,
30
- @JsonProperty("out_reports") List<CommitReport> outputCommitReports)
30
+ @JsonProperty("in_reports") List<Optional<CommitReport>> inputCommitReports,
31
+ @JsonProperty("out_reports") List<Optional<CommitReport>> outputCommitReports)
31
32
  {
32
33
  this.execSessionConfigSource = execSessionConfigSource;
33
34
  this.inputTaskSource = inputTaskSource;
@@ -69,13 +70,13 @@ public class ResumeState
69
70
  }
70
71
 
71
72
  @JsonProperty("in_reports")
72
- public List<CommitReport> getInputCommitReports()
73
+ public List<Optional<CommitReport>> getInputCommitReports()
73
74
  {
74
75
  return inputCommitReports;
75
76
  }
76
77
 
77
78
  @JsonProperty("out_reports")
78
- public List<CommitReport> getOutputCommitReports()
79
+ public List<Optional<CommitReport>> getOutputCommitReports()
79
80
  {
80
81
  return outputCommitReports;
81
82
  }
@@ -14,6 +14,7 @@ import org.embulk.spi.DecoderPlugin;
14
14
  import org.embulk.spi.EncoderPlugin;
15
15
  import org.embulk.spi.FilterPlugin;
16
16
  import org.embulk.spi.GuessPlugin;
17
+ import org.embulk.spi.ExecutorPlugin;
17
18
 
18
19
  public class JRubyPluginSource
19
20
  implements PluginSource
@@ -54,6 +55,8 @@ public class JRubyPluginSource
54
55
  category = "filter";
55
56
  } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
57
  category = "guess";
58
+ } else if (ExecutorPlugin.class.isAssignableFrom(iface)) {
59
+ category = "executor";
57
60
  } else {
58
61
  // unsupported plugin category
59
62
  throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
@@ -8,7 +8,7 @@ public class Buffer
8
8
  {
9
9
  public static final Buffer EMPTY = Buffer.allocate(0);
10
10
 
11
- private byte[] array;
11
+ private final byte[] array;
12
12
  private int offset;
13
13
  private int filled;
14
14
  private final int capacity;
@@ -111,6 +111,38 @@ public class Buffer
111
111
  {
112
112
  }
113
113
 
114
- // TODO equals
115
- // TODO hashCode
114
+ @Override
115
+ public boolean equals(Object other)
116
+ {
117
+ if (!(other instanceof Buffer)) {
118
+ return false;
119
+ }
120
+ Buffer o = (Buffer) other;
121
+
122
+ // TODO optimize
123
+ if (limit() != o.limit()) {
124
+ return false;
125
+ }
126
+ int i = offset;
127
+ int io = o.offset;
128
+ while (i < filled) {
129
+ if (array[i] != o.array[io]) {
130
+ return false;
131
+ }
132
+ i++;
133
+ io++;
134
+ }
135
+ return true;
136
+ }
137
+
138
+ @Override
139
+ public int hashCode()
140
+ {
141
+ // TODO optimize
142
+ int result = 1;
143
+ for (int i = offset; i < filled; i++) {
144
+ result = 31 * result + array[i];
145
+ }
146
+ return result;
147
+ }
116
148
  }
@@ -1,5 +1,6 @@
1
1
  package org.embulk.spi;
2
2
 
3
+ import java.util.concurrent.ExecutionException;
3
4
  import org.slf4j.Logger;
4
5
  import org.embulk.config.Task;
5
6
  import org.embulk.config.ModelManager;
@@ -15,11 +16,13 @@ public class Exec
15
16
 
16
17
  private Exec() { }
17
18
 
18
- public static <T> T doWith(ExecSession session, ExecAction<T> action) throws Exception
19
+ public static <T> T doWith(ExecSession session, ExecAction<T> action) throws ExecutionException
19
20
  {
20
21
  Exec.session.set(session);
21
22
  try {
22
23
  return action.run();
24
+ } catch (Exception ex) {
25
+ throw new ExecutionException(ex);
23
26
  } finally {
24
27
  Exec.session.set(null);
25
28
  }
@@ -2,5 +2,5 @@ package org.embulk.spi;
2
2
 
3
3
  public interface ExecAction <T>
4
4
  {
5
- public T run();
5
+ public T run() throws Exception;
6
6
  }
@@ -0,0 +1,19 @@
1
+ package org.embulk.spi;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+
5
+ public interface ExecutorPlugin
6
+ {
7
+ public interface Executor
8
+ {
9
+ public void execute(ProcessTask task, ProcessState state);
10
+ }
11
+
12
+ public interface Control
13
+ {
14
+ public void transaction(Schema executorSchema, int outputTaskCount, Executor executor);
15
+ }
16
+
17
+ public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
18
+ ExecutorPlugin.Control control);
19
+ }
@@ -28,6 +28,12 @@ public class Page
28
28
  return this;
29
29
  }
30
30
 
31
+ public List<String> getStringReferences()
32
+ {
33
+ // TODO used by mapreduce executor
34
+ return stringReferences;
35
+ }
36
+
31
37
  public String getStringReference(int index)
32
38
  {
33
39
  return stringReferences.get(index);
@@ -1,11 +1,16 @@
1
1
  package org.embulk.spi;
2
2
 
3
+ import java.util.List;
4
+ import java.util.Iterator;
5
+ import java.util.ArrayList;
6
+ import java.util.Enumeration;
7
+ import java.io.IOException;
3
8
  import java.nio.file.Path;
4
9
  import java.net.URL;
5
10
  import java.net.URLClassLoader;
6
11
  import java.net.MalformedURLException;
7
- import java.util.List;
8
12
  import com.google.common.collect.ImmutableList;
13
+ import com.google.common.collect.Iterators;
9
14
  import org.jruby.Ruby;
10
15
 
11
16
  public class PluginClassLoader
@@ -17,6 +22,12 @@ public class PluginClassLoader
17
22
  "com.ibm.icu.",
18
23
  };
19
24
 
25
+ private static final String[] CHILD_FIRST_PATHS = new String[] {
26
+ "io/netty/",
27
+ "org/yaml/",
28
+ "com/ibm/icu/",
29
+ };
30
+
20
31
  public PluginClassLoader(Ruby pluginJRubyRuntime, List<URL> urls)
21
32
  {
22
33
  this(urls, pluginJRubyRuntime.getJRubyClassLoader());
@@ -80,6 +91,57 @@ public class PluginClassLoader
80
91
  return clazz;
81
92
  }
82
93
 
94
+ @Override
95
+ public URL getResource(String name)
96
+ {
97
+ boolean childFirst = isInChildFirstPath(name);
98
+
99
+ if (childFirst) {
100
+ URL childUrl = findResource(name);
101
+ if (childUrl != null) {
102
+ return childUrl;
103
+ }
104
+ }
105
+
106
+ URL parentUrl = getParent().getResource(name);
107
+ if (parentUrl != null) {
108
+ return parentUrl;
109
+ }
110
+
111
+ if (!childFirst) {
112
+ URL childUrl = findResource(name);
113
+ if (childUrl != null) {
114
+ return childUrl;
115
+ }
116
+ }
117
+
118
+ return null;
119
+ }
120
+
121
+ @Override
122
+ public Enumeration<URL> getResources(String name)
123
+ throws IOException
124
+ {
125
+ List<Iterator<URL>> resources = new ArrayList<>();
126
+
127
+ boolean childFirst = isInChildFirstPath(name);
128
+
129
+ if (childFirst) {
130
+ Iterator<URL> childResources = Iterators.forEnumeration(findResources(name));
131
+ resources.add(childResources);
132
+ }
133
+
134
+ Iterator<URL> parentResources = Iterators.forEnumeration(getParent().getResources(name));
135
+ resources.add(parentResources);
136
+
137
+ if (!childFirst) {
138
+ Iterator<URL> childResources = Iterators.forEnumeration(findResources(name));
139
+ resources.add(childResources);
140
+ }
141
+
142
+ return Iterators.asEnumeration(Iterators.concat(resources.iterator()));
143
+ }
144
+
83
145
  private boolean isInChildFirstPackage(String name)
84
146
  {
85
147
  for (String pkg : CHILD_FIRST_PACKAGES) {
@@ -89,4 +151,14 @@ public class PluginClassLoader
89
151
  }
90
152
  return false;
91
153
  }
154
+
155
+ private boolean isInChildFirstPath(String name)
156
+ {
157
+ for (String path : CHILD_FIRST_PATHS) {
158
+ if (name.startsWith(path)) {
159
+ return true;
160
+ }
161
+ }
162
+ return false;
163
+ }
92
164
  }