embulk 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/build.gradle +1 -1
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -7
  5. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +664 -0
  6. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +5 -0
  7. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +130 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
  9. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +3 -3
  10. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +7 -6
  12. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +35 -3
  14. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +4 -1
  15. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +1 -1
  16. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/Page.java +6 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +73 -1
  19. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +118 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +70 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +92 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +17 -3
  24. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
  25. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
  26. data/embulk-docs/src/release.rst +1 -0
  27. data/embulk-docs/src/release/release-0.6.0.rst +34 -0
  28. data/lib/embulk/executor_plugin.rb +23 -0
  29. data/lib/embulk/java_plugin.rb +5 -0
  30. data/lib/embulk/plugin.rb +13 -2
  31. data/lib/embulk/version.rb +1 -1
  32. metadata +15 -5
  33. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +0 -660
@@ -13,6 +13,7 @@ import org.embulk.config.ModelManager;
13
13
  import org.embulk.spi.time.DateTimeZoneSerDe;
14
14
  import org.embulk.spi.time.TimestampSerDe;
15
15
  import org.embulk.spi.ParserPlugin;
16
+ import org.embulk.spi.ExecutorPlugin;
16
17
  import org.embulk.spi.BufferAllocator;
17
18
  import org.embulk.spi.util.CharsetSerDe;
18
19
  import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
@@ -33,6 +34,10 @@ public class ExecModule
33
34
  registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
34
35
  registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
35
36
 
37
+ // LocalExecutorPlugin
38
+ binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
39
+ registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
40
+
36
41
  // serde
37
42
  ObjectMapperModule mapper = new ObjectMapperModule();
38
43
  DateTimeZoneSerDe.configure(mapper);
@@ -0,0 +1,130 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.concurrent.Callable;
6
+ import java.util.concurrent.Future;
7
+ import java.util.concurrent.ExecutorService;
8
+ import java.util.concurrent.ExecutionException;
9
+ import org.slf4j.Logger;
10
+ import com.google.inject.Inject;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.CommitReport;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.spi.ExecutorPlugin;
15
+ import org.embulk.spi.ProcessTask;
16
+ import org.embulk.spi.ProcessState;
17
+ import org.embulk.spi.TaskState;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.util.Executors;
20
+ import org.embulk.spi.util.Executors.ProcessStateCallback;
21
+
22
+ public class LocalExecutorPlugin
23
+ implements ExecutorPlugin
24
+ {
25
+ private final ExecutorService executor;
26
+
27
+ @Inject
28
+ public LocalExecutorPlugin(LocalThreadExecutor executor)
29
+ {
30
+ this.executor = executor.getExecutorService();
31
+ }
32
+
33
+ @Override
34
+ public void transaction(ConfigSource config, Schema outputSchema, final int inputTaskCount,
35
+ ExecutorPlugin.Control control)
36
+ {
37
+ control.transaction(outputSchema, inputTaskCount, new Executor() {
38
+ public void execute(ProcessTask task, ProcessState state)
39
+ {
40
+ localExecute(task, inputTaskCount, state);
41
+ }
42
+ });
43
+ }
44
+
45
+ private void localExecute(ProcessTask task, int taskCount, ProcessState state)
46
+ {
47
+ Logger log = Exec.getLogger(LocalExecutorPlugin.class);
48
+
49
+ state.initialize(taskCount, taskCount);
50
+
51
+ List<Future<Throwable>> futures = new ArrayList<>(taskCount);
52
+ try {
53
+ for (int i=0; i < taskCount; i++) {
54
+ if (state.getOutputTaskState(i).isCommitted()) {
55
+ log.warn("Skipped resumed task {}", i);
56
+ futures.add(null); // resumed
57
+ } else {
58
+ futures.add(startProcessor(task, i, state));
59
+ }
60
+ }
61
+ showProgress(log, state, taskCount);
62
+
63
+ for (int i=0; i < taskCount; i++) {
64
+ if (futures.get(i) == null) {
65
+ continue;
66
+ }
67
+ try {
68
+ state.getInputTaskState(i).setException(futures.get(i).get());
69
+ } catch (ExecutionException ex) {
70
+ state.getInputTaskState(i).setException(ex.getCause());
71
+ //Throwables.propagate(ex.getCause());
72
+ } catch (InterruptedException ex) {
73
+ state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
74
+ }
75
+ showProgress(log, state, taskCount);
76
+ }
77
+ } finally {
78
+ for (Future<Throwable> future : futures) {
79
+ if (future != null && !future.isDone()) {
80
+ future.cancel(true);
81
+ // TODO join?
82
+ }
83
+ }
84
+ }
85
+ }
86
+
87
+ private void showProgress(Logger log, ProcessState state, int taskCount)
88
+ {
89
+ int started = 0;
90
+ int finished = 0;
91
+ for (int i=0; i < taskCount; i++) {
92
+ if (state.getInputTaskState(i).isStarted()) { started++; }
93
+ if (state.getOutputTaskState(i).isFinished()) { finished++; }
94
+ }
95
+
96
+ log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
97
+ }
98
+
99
+ private Future<Throwable> startProcessor(final ProcessTask task, final int taskIndex, final ProcessState state)
100
+ {
101
+ return executor.submit(new Callable<Throwable>() {
102
+ public Throwable call()
103
+ {
104
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
105
+ Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
106
+ public void started()
107
+ {
108
+ state.getInputTaskState(taskIndex).start();
109
+ state.getOutputTaskState(taskIndex).start();
110
+ }
111
+
112
+ public void inputCommitted(CommitReport report)
113
+ {
114
+ state.getInputTaskState(taskIndex).setCommitReport(report);
115
+ }
116
+
117
+ public void outputCommitted(CommitReport report)
118
+ {
119
+ state.getOutputTaskState(taskIndex).setCommitReport(report);
120
+ }
121
+ });
122
+ return null;
123
+ } finally {
124
+ state.getInputTaskState(taskIndex).finish();
125
+ state.getOutputTaskState(taskIndex).finish();
126
+ }
127
+ }
128
+ });
129
+ }
130
+ }
@@ -0,0 +1,34 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.concurrent.Executors;
4
+ import java.util.concurrent.ExecutorService;
5
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
6
+ import com.google.inject.Inject;
7
+ import org.embulk.config.ConfigSource;
8
+
9
+ /*
10
+ * Injected in SINGLETON scope at ExecModule
11
+ */
12
+ public class LocalThreadExecutor
13
+ {
14
+ private final ExecutorService executor;
15
+
16
+ @Inject
17
+ public LocalThreadExecutor(@ForSystemConfig ConfigSource systemConfig)
18
+ {
19
+ int defaultMaxThreads = Runtime.getRuntime().availableProcessors() * 2;
20
+ int maxThreads = systemConfig.get(Integer.class, "max_threads", defaultMaxThreads);
21
+ this.executor = Executors.newFixedThreadPool(maxThreads,
22
+ new ThreadFactoryBuilder()
23
+ .setNameFormat("embulk-executor-%d")
24
+ .setDaemon(true)
25
+ .build());
26
+ }
27
+
28
+ public ExecutorService getExecutorService()
29
+ {
30
+ return executor;
31
+ }
32
+
33
+ // TODO shutdown
34
+ }
@@ -9,10 +9,10 @@ import org.embulk.spi.BufferAllocator;
9
9
  public class PooledBufferAllocator
10
10
  implements BufferAllocator
11
11
  {
12
- private PooledByteBufAllocator nettyBuffer;
12
+ private static final int DEFAULT_BUFFER_SIZE = 32*1024;
13
+ private static final int MINIMUM_BUFFER_SIZE = 8*1024;
13
14
 
14
- private int DEFAULT_BUFFER_SIZE = 32*1024;
15
- private int MINIMUM_BUFFER_SIZE = 8*1024;
15
+ private final PooledByteBufAllocator nettyBuffer;
16
16
 
17
17
  public PooledBufferAllocator()
18
18
  {
@@ -80,7 +80,7 @@ public class PreviewExecutor
80
80
 
81
81
  protected List<FilterPlugin> newFilterPlugins(PreviewTask task)
82
82
  {
83
- return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
83
+ return Filters.newFilterPluginsFromConfigSources(Exec.session(), task.getFilterConfigs());
84
84
  }
85
85
 
86
86
  private PreviewResult doPreview(ConfigSource config)
@@ -1,6 +1,7 @@
1
1
  package org.embulk.exec;
2
2
 
3
3
  import java.util.List;
4
+ import com.google.common.base.Optional;
4
5
  import com.fasterxml.jackson.annotation.JsonCreator;
5
6
  import com.fasterxml.jackson.annotation.JsonProperty;
6
7
  import org.embulk.config.TaskSource;
@@ -16,8 +17,8 @@ public class ResumeState
16
17
  private final TaskSource outputTaskSource;
17
18
  private final Schema inputSchema;
18
19
  private final Schema outputSchema;
19
- private final List<CommitReport> inputCommitReports;
20
- private final List<CommitReport> outputCommitReports;
20
+ private final List<Optional<CommitReport>> inputCommitReports;
21
+ private final List<Optional<CommitReport>> outputCommitReports;
21
22
 
22
23
  @JsonCreator
23
24
  public ResumeState(
@@ -26,8 +27,8 @@ public class ResumeState
26
27
  @JsonProperty("out_task") TaskSource outputTaskSource,
27
28
  @JsonProperty("in_schema") Schema inputSchema,
28
29
  @JsonProperty("out_schema") Schema outputSchema,
29
- @JsonProperty("in_reports") List<CommitReport> inputCommitReports,
30
- @JsonProperty("out_reports") List<CommitReport> outputCommitReports)
30
+ @JsonProperty("in_reports") List<Optional<CommitReport>> inputCommitReports,
31
+ @JsonProperty("out_reports") List<Optional<CommitReport>> outputCommitReports)
31
32
  {
32
33
  this.execSessionConfigSource = execSessionConfigSource;
33
34
  this.inputTaskSource = inputTaskSource;
@@ -69,13 +70,13 @@ public class ResumeState
69
70
  }
70
71
 
71
72
  @JsonProperty("in_reports")
72
- public List<CommitReport> getInputCommitReports()
73
+ public List<Optional<CommitReport>> getInputCommitReports()
73
74
  {
74
75
  return inputCommitReports;
75
76
  }
76
77
 
77
78
  @JsonProperty("out_reports")
78
- public List<CommitReport> getOutputCommitReports()
79
+ public List<Optional<CommitReport>> getOutputCommitReports()
79
80
  {
80
81
  return outputCommitReports;
81
82
  }
@@ -14,6 +14,7 @@ import org.embulk.spi.DecoderPlugin;
14
14
  import org.embulk.spi.EncoderPlugin;
15
15
  import org.embulk.spi.FilterPlugin;
16
16
  import org.embulk.spi.GuessPlugin;
17
+ import org.embulk.spi.ExecutorPlugin;
17
18
 
18
19
  public class JRubyPluginSource
19
20
  implements PluginSource
@@ -54,6 +55,8 @@ public class JRubyPluginSource
54
55
  category = "filter";
55
56
  } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
57
  category = "guess";
58
+ } else if (ExecutorPlugin.class.isAssignableFrom(iface)) {
59
+ category = "executor";
57
60
  } else {
58
61
  // unsupported plugin category
59
62
  throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
@@ -8,7 +8,7 @@ public class Buffer
8
8
  {
9
9
  public static final Buffer EMPTY = Buffer.allocate(0);
10
10
 
11
- private byte[] array;
11
+ private final byte[] array;
12
12
  private int offset;
13
13
  private int filled;
14
14
  private final int capacity;
@@ -111,6 +111,38 @@ public class Buffer
111
111
  {
112
112
  }
113
113
 
114
- // TODO equals
115
- // TODO hashCode
114
+ @Override
115
+ public boolean equals(Object other)
116
+ {
117
+ if (!(other instanceof Buffer)) {
118
+ return false;
119
+ }
120
+ Buffer o = (Buffer) other;
121
+
122
+ // TODO optimize
123
+ if (limit() != o.limit()) {
124
+ return false;
125
+ }
126
+ int i = offset;
127
+ int io = o.offset;
128
+ while (i < filled) {
129
+ if (array[i] != o.array[io]) {
130
+ return false;
131
+ }
132
+ i++;
133
+ io++;
134
+ }
135
+ return true;
136
+ }
137
+
138
+ @Override
139
+ public int hashCode()
140
+ {
141
+ // TODO optimize
142
+ int result = 1;
143
+ for (int i = offset; i < filled; i++) {
144
+ result = 31 * result + array[i];
145
+ }
146
+ return result;
147
+ }
116
148
  }
@@ -1,5 +1,6 @@
1
1
  package org.embulk.spi;
2
2
 
3
+ import java.util.concurrent.ExecutionException;
3
4
  import org.slf4j.Logger;
4
5
  import org.embulk.config.Task;
5
6
  import org.embulk.config.ModelManager;
@@ -15,11 +16,13 @@ public class Exec
15
16
 
16
17
  private Exec() { }
17
18
 
18
- public static <T> T doWith(ExecSession session, ExecAction<T> action) throws Exception
19
+ public static <T> T doWith(ExecSession session, ExecAction<T> action) throws ExecutionException
19
20
  {
20
21
  Exec.session.set(session);
21
22
  try {
22
23
  return action.run();
24
+ } catch (Exception ex) {
25
+ throw new ExecutionException(ex);
23
26
  } finally {
24
27
  Exec.session.set(null);
25
28
  }
@@ -2,5 +2,5 @@ package org.embulk.spi;
2
2
 
3
3
  public interface ExecAction <T>
4
4
  {
5
- public T run();
5
+ public T run() throws Exception;
6
6
  }
@@ -0,0 +1,19 @@
1
+ package org.embulk.spi;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+
5
+ public interface ExecutorPlugin
6
+ {
7
+ public interface Executor
8
+ {
9
+ public void execute(ProcessTask task, ProcessState state);
10
+ }
11
+
12
+ public interface Control
13
+ {
14
+ public void transaction(Schema executorSchema, int outputTaskCount, Executor executor);
15
+ }
16
+
17
+ public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
18
+ ExecutorPlugin.Control control);
19
+ }
@@ -28,6 +28,12 @@ public class Page
28
28
  return this;
29
29
  }
30
30
 
31
+ public List<String> getStringReferences()
32
+ {
33
+ // TODO used by mapreduce executor
34
+ return stringReferences;
35
+ }
36
+
31
37
  public String getStringReference(int index)
32
38
  {
33
39
  return stringReferences.get(index);
@@ -1,11 +1,16 @@
1
1
  package org.embulk.spi;
2
2
 
3
+ import java.util.List;
4
+ import java.util.Iterator;
5
+ import java.util.ArrayList;
6
+ import java.util.Enumeration;
7
+ import java.io.IOException;
3
8
  import java.nio.file.Path;
4
9
  import java.net.URL;
5
10
  import java.net.URLClassLoader;
6
11
  import java.net.MalformedURLException;
7
- import java.util.List;
8
12
  import com.google.common.collect.ImmutableList;
13
+ import com.google.common.collect.Iterators;
9
14
  import org.jruby.Ruby;
10
15
 
11
16
  public class PluginClassLoader
@@ -17,6 +22,12 @@ public class PluginClassLoader
17
22
  "com.ibm.icu.",
18
23
  };
19
24
 
25
+ private static final String[] CHILD_FIRST_PATHS = new String[] {
26
+ "io/netty/",
27
+ "org/yaml/",
28
+ "com/ibm/icu/",
29
+ };
30
+
20
31
  public PluginClassLoader(Ruby pluginJRubyRuntime, List<URL> urls)
21
32
  {
22
33
  this(urls, pluginJRubyRuntime.getJRubyClassLoader());
@@ -80,6 +91,57 @@ public class PluginClassLoader
80
91
  return clazz;
81
92
  }
82
93
 
94
+ @Override
95
+ public URL getResource(String name)
96
+ {
97
+ boolean childFirst = isInChildFirstPath(name);
98
+
99
+ if (childFirst) {
100
+ URL childUrl = findResource(name);
101
+ if (childUrl != null) {
102
+ return childUrl;
103
+ }
104
+ }
105
+
106
+ URL parentUrl = getParent().getResource(name);
107
+ if (parentUrl != null) {
108
+ return parentUrl;
109
+ }
110
+
111
+ if (!childFirst) {
112
+ URL childUrl = findResource(name);
113
+ if (childUrl != null) {
114
+ return childUrl;
115
+ }
116
+ }
117
+
118
+ return null;
119
+ }
120
+
121
+ @Override
122
+ public Enumeration<URL> getResources(String name)
123
+ throws IOException
124
+ {
125
+ List<Iterator<URL>> resources = new ArrayList<>();
126
+
127
+ boolean childFirst = isInChildFirstPath(name);
128
+
129
+ if (childFirst) {
130
+ Iterator<URL> childResources = Iterators.forEnumeration(findResources(name));
131
+ resources.add(childResources);
132
+ }
133
+
134
+ Iterator<URL> parentResources = Iterators.forEnumeration(getParent().getResources(name));
135
+ resources.add(parentResources);
136
+
137
+ if (!childFirst) {
138
+ Iterator<URL> childResources = Iterators.forEnumeration(findResources(name));
139
+ resources.add(childResources);
140
+ }
141
+
142
+ return Iterators.asEnumeration(Iterators.concat(resources.iterator()));
143
+ }
144
+
83
145
  private boolean isInChildFirstPackage(String name)
84
146
  {
85
147
  for (String pkg : CHILD_FIRST_PACKAGES) {
@@ -89,4 +151,14 @@ public class PluginClassLoader
89
151
  }
90
152
  return false;
91
153
  }
154
+
155
+ private boolean isInChildFirstPath(String name)
156
+ {
157
+ for (String path : CHILD_FIRST_PATHS) {
158
+ if (name.startsWith(path)) {
159
+ return true;
160
+ }
161
+ }
162
+ return false;
163
+ }
92
164
  }