embulk 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NGUwOTc0ZDE1MWZlZjJhYjdhNmJmMjQwZjliOWU3MmEyYmM5ZTczNQ==
4
+ N2ZlNTcxZjM0ZjA4MDE5YmM0NmU2ZTQ3ZmZkM2ZkZDM5OWFkNjRiNA==
5
5
  data.tar.gz: !binary |-
6
- ZjA1YTE5NDlhZGViMTU1NjVmOTBhZDVlZDY5NGZjODI0NGU5OGViZA==
6
+ NzU2NjYzOTQzM2ExYzdiNzc0YzUzYWZiNzcwZTU2ZmFkZmUxYjMzOQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NDY3NzQ1NTkxNTk5MzAzMGQ2ZmIzYjM0YjMyMTczOGM1YjhmYzFkYTg0YTY3
10
- ZDBlNzdiYWIwZmVkMWU5YzA3NTEyYzA2ZGI3YjQyMTQ5ZDI2MWI3ZWEwZTM0
11
- YjQ3MTllMTZkYzdlYTM2YjNlZDVjNGIwYjEwNDVhNjlmN2IxYTk=
9
+ NWM5ZjJiNjhkNDQwZDgyZGU4MzhiMmNmYjI5OGZhMjk2OTAyM2FlOWM4YzYy
10
+ ZTczZTgzMDBkYjY3ZmZmODM0NGU0NmM2MTBjYmRiNDY1ZDliY2QzZmM2OTZi
11
+ ZmIzZjZmYzc4MTJkOWE2ODM5ODAxYTZhMzgzNzE0NzM5YzgyODU=
12
12
  data.tar.gz: !binary |-
13
- MGZjMzM1NmVhNzdhZDhjODg3ZWZiNGRmOWQwMTU5MzUwZmEwYTBkMDY1MTgz
14
- NDBhOTAwM2Y3NDNjM2VlZTE1YjRkZjA4MWNiZjZjN2QzOTBjYTliMzJlYTgw
15
- OGY2ZGZmMDJmMTI4ZWU1YjNmMTMxNTc5NDdjN2NiODkxYzQ4MmI=
13
+ NTg3MjdiZWNjNGYzN2MzZTM0YzY0OTRmNGJlYmI5ODY3ZmY5ZWFiY2RkN2Yy
14
+ NjI0ZmIzOWJjNDU0NzM4ZDhlYTJjZTBkODc3MmRjYjI5Mzc5OWUyZDQwYzA5
15
+ MTRiMGE1NmU4YmI1MWU1Yjk1ZmJlZTU3Y2Y0NDY5YTFjN2EzOTQ=
data/ChangeLog CHANGED
@@ -1,4 +1,16 @@
1
1
 
2
+ 2015-02-03 version 0.3.0:
3
+
4
+ * Added resume functionality. InputPlugin and OutputPlugin needs to implement
5
+ resume and cleanup methods.
6
+ * cli: embulk-run supports -r, --resume-state PATH option.
7
+ * Added FilterInputPlugin Java API.
8
+ * Added FilterInputPlugin JRuby API.
9
+ * Configuration file accepts filters: array entry.
10
+ * Added gradle-versions-plugin to build.gradle (@seratch++)
11
+ * Fixed broken dependencies at build.gradle (@thagikura++)
12
+
13
+
2
14
  2015-01-29 version 0.2.1:
3
15
 
4
16
  * Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
data/README.md CHANGED
@@ -1,27 +1,30 @@
1
- # Embulk
2
-
3
- A plugin-based parallel bulk data loader that makes painful data integration works relaxed.
4
-
5
- ## What's Embulk?
1
+ # What's Embulk?
6
2
 
7
3
  Embulk is a plugin-based parallel bulk data loader that helps **data transfer** between various **storages**, **databases**, **NoSQL** and **cloud services**.
8
4
 
9
- You can install input and output plugins to integrate many other file formats and storages.
10
-
11
- You also can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying.
12
- Packaging effrots into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
5
+ You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging effrots into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
13
6
 
14
- [Embuk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
7
+ [Embulk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
15
8
 
16
9
  [![Embulk](https://gist.githubusercontent.com/frsyuki/f322a77ee2766a508ba9/raw/e8539b6b4fda1b3357e8c79d3966aa8148dbdbd3/embulk-overview.png)](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed/12)
17
10
 
11
+ # Document
12
+
13
+ * [Quick Start](#quick-start)
14
+ * [Using plugins](#using-plugins)
15
+ * [Using plugin bundle](#using-plugin-bundle)
16
+ * [Releasing plugins to RubyGems](#releasing-plugins-to-rubygems)
17
+ * [Resuming a failed transaction](#resuming-a-failed-transaction)
18
+ * [Embulk Development](#embulk-development)
19
+ * [Build](#build)
20
+ * [Release](#release)
18
21
 
19
22
  ## Quick Start
20
23
 
21
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
22
25
 
23
26
  ```
24
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.2.1.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.3.0.jar -O embulk.jar
25
28
  java -jar embulk.jar --help
26
29
  ```
27
30
 
@@ -37,10 +40,10 @@ java -jar embulk.jar run config.yml
37
40
  ### Using plugins
38
41
 
39
42
  You can use plugins to load data from/to various systems and file formats.
40
- An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
43
+ An example is [embulk-plugin-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
41
44
 
42
45
  ```
43
- java -jar embulk.jar gem install embulk-output-postgres-json
46
+ java -jar embulk.jar gem install embulk-plugin-postgres-json
44
47
  java -jar embulk.jar gem list
45
48
  ```
46
49
 
@@ -67,6 +70,28 @@ TODO: documents
67
70
  embulk-plugin-xyz
68
71
  ```
69
72
 
73
+ ### Resuming a failed transaction
74
+
75
+ Embulk supports resuming failed transactions.
76
+ To enable resuming, you need to start transaction with `-r PATH` option:
77
+
78
+ ```
79
+ java -jar embulk.jar run config.yml -r resume-state.yml
80
+ ```
81
+
82
+ If the transaction fails, embulk stores state some states to the yaml file. You can retry the transaction using exactly same command:
83
+
84
+ ```
85
+ java -jar embulk.jar run config.yml -r resume-state.yml
86
+ ```
87
+
88
+ If you giveup to resume the transaction, you can use `embulk cleanup` subcommand to delete intermediate data:
89
+
90
+ ```
91
+ java -jar embulk.jar cleanup config.yml -r resume-state.yml
92
+ ```
93
+
94
+
70
95
  ## Embulk Development
71
96
 
72
97
  ### Build
@@ -9,6 +9,7 @@ buildscript {
9
9
  }
10
10
  dependencies {
11
11
  classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.0'
12
+ classpath 'com.github.ben-manes:gradle-versions-plugin:0.7'
12
13
  }
13
14
  }
14
15
 
@@ -22,8 +23,12 @@ allprojects {
22
23
  apply plugin: 'maven-publish'
23
24
  apply plugin: 'com.jfrog.bintray'
24
25
 
26
+ // determine which dependencies have updates
27
+ // $ gradle dependencyUpdates
28
+ apply plugin: 'com.github.ben-manes.versions'
29
+
25
30
  group = 'org.embulk'
26
- version = '0.2.1'
31
+ version = '0.3.0'
27
32
 
28
33
  // to upload artifacts to Bintray by gradle-bintray-plugin
29
34
  // $ gradle bintrayUpload
@@ -5,7 +5,7 @@
5
5
  <parent>
6
6
  <groupId>org.embulk</groupId>
7
7
  <artifactId>embulk-parent</artifactId>
8
- <version>0.2.1-SNAPSHOT</version>
8
+ <version>0.3.0-SNAPSHOT</version>
9
9
  </parent>
10
10
 
11
11
  <artifactId>embulk-cli</artifactId>
@@ -5,7 +5,7 @@
5
5
  <parent>
6
6
  <groupId>org.embulk</groupId>
7
7
  <artifactId>embulk-parent</artifactId>
8
- <version>0.2.1-SNAPSHOT</version>
8
+ <version>0.3.0-SNAPSHOT</version>
9
9
  </parent>
10
10
 
11
11
  <artifactId>embulk-core</artifactId>
@@ -21,10 +21,12 @@ import org.embulk.config.NextConfig;
21
21
  import org.embulk.config.ModelManager;
22
22
  import org.embulk.config.ConfigException;
23
23
  import org.embulk.exec.LocalExecutor;
24
- import org.embulk.exec.ExecuteResult;
24
+ import org.embulk.exec.ExecutionResult;
25
25
  import org.embulk.exec.GuessExecutor;
26
26
  import org.embulk.exec.PreviewExecutor;
27
27
  import org.embulk.exec.PreviewResult;
28
+ import org.embulk.exec.ResumeState;
29
+ import org.embulk.exec.PartialExecutionException;
28
30
  import org.embulk.spi.time.Timestamp;
29
31
  import org.embulk.spi.ExecSession;
30
32
  import org.embulk.spi.util.Pages;
@@ -36,6 +38,9 @@ public class Runner
36
38
  {
37
39
  private String nextConfigOutputPath;
38
40
  public String getNextConfigOutputPath() { return nextConfigOutputPath; }
41
+
42
+ private String resumeStatePath;
43
+ public String getResumeStatePath() { return resumeStatePath; }
39
44
  }
40
45
 
41
46
  private final Options options;
@@ -58,6 +63,9 @@ public class Runner
58
63
  case "run":
59
64
  run(args[0]);
60
65
  break;
66
+ case "cleanup":
67
+ cleanup(args[0]);
68
+ break;
61
69
  case "guess":
62
70
  guess(args[0]);
63
71
  break;
@@ -72,21 +80,87 @@ public class Runner
72
80
  public void run(String configPath)
73
81
  {
74
82
  ConfigSource config = loadYamlConfig(configPath);
75
- checkNextConfigOutputPath(options.getNextConfigOutputPath());
83
+ checkFileWritable(options.getNextConfigOutputPath());
84
+ checkFileWritable(options.getResumeStatePath());
85
+
86
+ // load resume state file
87
+ ResumeState resume = null;
88
+ String resumePath = options.getResumeStatePath();
89
+ if (resumePath != null) {
90
+ ConfigSource resumeConfig = null;
91
+ try {
92
+ resumeConfig = loadYamlConfig(resumePath);
93
+ if (resumeConfig.isEmpty()) {
94
+ resumeConfig = null;
95
+ }
96
+ } catch (RuntimeException ex) {
97
+ // leave resumeConfig == null
98
+ }
99
+ if (resumeConfig != null) {
100
+ resume = resumeConfig.loadConfig(ResumeState.class);
101
+ }
102
+ }
76
103
 
77
104
  ExecSession exec = newExecSession(config);
78
105
  LocalExecutor local = injector.getInstance(LocalExecutor.class);
79
- ExecuteResult result = local.run(exec, config);
80
- NextConfig nextConfig = result.getNextConfig();
106
+ ExecutionResult result;
107
+ try {
108
+ if (resume != null) {
109
+ result = local.resume(config, resume);
110
+ } else {
111
+ result = local.run(exec, config);
112
+ }
113
+ } catch (PartialExecutionException partial) {
114
+ if (options.getResumeStatePath() == null) {
115
+ // resume state path is not set. cleanup the transaction
116
+ exec.getLogger(Runner.class).info("Transaction partially failed. Cleaning up the intermediate data. Use -r option to make it resumable.");
117
+ try {
118
+ local.cleanup(config, partial.getResumeState());
119
+ } catch (Throwable ex) {
120
+ partial.addSuppressed(ex);
121
+ }
122
+ throw partial;
123
+ }
124
+ // save the resume state
125
+ exec.getLogger(Runner.class).info("Writing resume state to '{}'", options.getResumeStatePath());
126
+ writeYaml(options.getResumeStatePath(), partial.getResumeState());
127
+ exec.getLogger(Runner.class).info("Resume state is written. Run the transaction again with -r option to resume or use \"cleanup\" subcommand to delete intermediate data.");
128
+ throw partial;
129
+ }
81
130
 
131
+ // delete resume file
132
+ if (options.getResumeStatePath() != null) {
133
+ new File(options.getResumeStatePath()).delete();
134
+ }
135
+
136
+ // write next config
137
+ NextConfig nextConfig = result.getNextConfig();
82
138
  exec.getLogger(Runner.class).info("next config: {}", nextConfig.toString());
83
139
  writeNextConfig(options.getNextConfigOutputPath(), config, nextConfig);
84
140
  }
85
141
 
142
+ public void cleanup(String configPath)
143
+ {
144
+ String resumePath = options.getResumeStatePath();
145
+ if (resumePath == null) {
146
+ throw new IllegalArgumentException("Resume path is required for cleanup");
147
+ }
148
+ ConfigSource config = loadYamlConfig(configPath);
149
+ ConfigSource resumeConfig = loadYamlConfig(resumePath);
150
+ ResumeState resume = resumeConfig.loadConfig(ResumeState.class);
151
+
152
+ ExecSession exec = newExecSession(config);
153
+ LocalExecutor local = injector.getInstance(LocalExecutor.class);
154
+ local.cleanup(config, resume);
155
+
156
+ // delete resume file
157
+ new File(options.getResumeStatePath()).delete();
158
+ }
159
+
86
160
  public void guess(String partialConfigPath)
87
161
  {
88
162
  ConfigSource config = loadYamlConfig(partialConfigPath);
89
- checkNextConfigOutputPath(options.getNextConfigOutputPath());
163
+ checkFileWritable(options.getNextConfigOutputPath());
90
164
 
91
165
  ExecSession exec = newExecSession(config);
92
166
  GuessExecutor guess = injector.getInstance(GuessExecutor.class);
@@ -96,7 +170,7 @@ public class Runner
96
170
  System.err.println(yml);
97
171
  }
98
172
 
99
- private void checkNextConfigOutputPath(String path)
173
+ private void checkFileWritable(String path)
100
174
  {
101
175
  if (path != null) {
102
176
  try (FileOutputStream in = new FileOutputStream(path, true)) {
@@ -109,7 +183,12 @@ public class Runner
109
183
 
110
184
  private String writeNextConfig(String path, ConfigSource originalConfig, NextConfig nextConfigDiff)
111
185
  {
112
- String yml = dumpConfigInYaml(originalConfig.merge(nextConfigDiff));
186
+ return writeYaml(path, originalConfig.merge(nextConfigDiff));
187
+ }
188
+
189
+ private String writeYaml(String path, Object obj)
190
+ {
191
+ String yml = dumpYaml(obj);
113
192
  if (path != null) {
114
193
  if (path.equals("-")) {
115
194
  System.out.print(yml);
@@ -181,7 +260,7 @@ public class Runner
181
260
  }
182
261
  }
183
262
 
184
- private String dumpConfigInYaml(DataSource config)
263
+ private String dumpYaml(Object config)
185
264
  {
186
265
  ModelManager model = injector.getInstance(ModelManager.class);
187
266
  Map<String, Object> map = model.readObject(MapType.class, model.writeObject(config));
@@ -3,7 +3,7 @@ package org.embulk.config;
3
3
  public interface ConfigSource
4
4
  extends DataSource
5
5
  {
6
- public <T extends Task> T loadConfig(Class<T> taskType);
6
+ public <T> T loadConfig(Class<T> taskType);
7
7
 
8
8
  @Override
9
9
  public ConfigSource getNested(String attrName);
@@ -170,17 +170,30 @@ public class DataSourceImpl
170
170
 
171
171
  private static void mergeJsonArray(ArrayNode src, ArrayNode other)
172
172
  {
173
- src.addAll(other);
173
+ for (int i=0; i < other.size(); i++) {
174
+ JsonNode s = src.get(i);
175
+ JsonNode v = other.get(i);
176
+ if (s == null) {
177
+ src.add(v);
178
+ } else if (v.isObject() && s.isObject()) {
179
+ mergeJsonObject((ObjectNode) s, (ObjectNode) v);
180
+ } else if (v.isArray() && s.isArray()) {
181
+ mergeJsonArray((ArrayNode) s, (ArrayNode) v);
182
+ } else {
183
+ src.remove(i);
184
+ src.insert(i, v);
185
+ }
186
+ }
174
187
  }
175
188
 
176
189
  @Override
177
- public <T extends Task> T loadTask(Class<T> taskType)
190
+ public <T> T loadTask(Class<T> taskType)
178
191
  {
179
192
  return model.readObject(taskType, data.traverse());
180
193
  }
181
194
 
182
195
  @Override
183
- public <T extends Task> T loadConfig(Class<T> taskType)
196
+ public <T> T loadConfig(Class<T> taskType)
184
197
  {
185
198
  return model.readObjectWithConfigSerDe(taskType, data.traverse());
186
199
  }
@@ -3,7 +3,7 @@ package org.embulk.config;
3
3
  public interface TaskSource
4
4
  extends DataSource
5
5
  {
6
- public <T extends Task> T loadTask(Class<T> taskType);
6
+ public <T> T loadTask(Class<T> taskType);
7
7
 
8
8
  @Override
9
9
  public TaskSource getNested(String attrName);
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class ExecutionInterruptedException
4
+ extends RuntimeException
5
+ {
6
+ public ExecutionInterruptedException(Exception cause)
7
+ {
8
+ super(cause);
9
+ }
10
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.NextConfig;
5
+
6
+ public class ExecutionResult
7
+ {
8
+ private final NextConfig nextConfig;
9
+ private final List<Throwable> ignoredExceptions;
10
+
11
+ public ExecutionResult(NextConfig nextConfig, List<Throwable> ignoredExceptions)
12
+ {
13
+ this.nextConfig = nextConfig;
14
+ this.ignoredExceptions = ignoredExceptions;
15
+ }
16
+
17
+ public NextConfig getNextConfig()
18
+ {
19
+ return nextConfig;
20
+ }
21
+
22
+ public List<Throwable> getIgnoredExceptions()
23
+ {
24
+ return ignoredExceptions;
25
+ }
26
+ }
@@ -158,7 +158,7 @@ public class GuessExecutor
158
158
  private static class BufferFileInputPlugin
159
159
  implements FileInputPlugin
160
160
  {
161
- private final Buffer buffer;
161
+ private Buffer buffer;
162
162
 
163
163
  public BufferFileInputPlugin(Buffer buffer)
164
164
  {
@@ -171,6 +171,23 @@ public class GuessExecutor
171
171
  return Exec.newNextConfig();
172
172
  }
173
173
 
174
+ public NextConfig resume(TaskSource taskSource,
175
+ int processorCount,
176
+ FileInputPlugin.Control control)
177
+ {
178
+ throw new UnsupportedOperationException();
179
+ }
180
+
181
+ public void cleanup(TaskSource taskSource,
182
+ int processorCount,
183
+ List<CommitReport> successCommitReports)
184
+ {
185
+ if (buffer != null) {
186
+ buffer.release();
187
+ buffer = null;
188
+ }
189
+ }
190
+
174
191
  public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
175
192
  {
176
193
  return new BufferTransactionalFileInput(buffer);
@@ -257,6 +274,7 @@ public class GuessExecutor
257
274
  NextConfig mergedGuessed = Exec.newNextConfig();
258
275
  for (int i=0; i < guesses.size(); i++) {
259
276
  NextConfig guessed = guesses.get(i).guess(originalConfig, sample);
277
+ guessed = addAssumedDecoderConfigs(originalConfig, guessed);
260
278
  mergedGuessed.merge(guessed);
261
279
  mergedConfig.merge(mergedGuessed);
262
280
  if (!mergedConfig.equals(originalConfig)) {
@@ -287,6 +305,24 @@ public class GuessExecutor
287
305
  }
288
306
  throw new NoSampleException("No input buffer to guess");
289
307
  }
308
+
309
+ private static class ConfigSourceList extends ArrayList<ConfigSource> { };
310
+
311
+ private static NextConfig addAssumedDecoderConfigs(ConfigSource originalConfig, NextConfig guessed)
312
+ {
313
+ List<ConfigSource> guessedDecoders = guessed.get(ConfigSourceList.class, "decoders", null);
314
+ if (guessedDecoders == null) {
315
+ return guessed;
316
+ } else {
317
+ List<ConfigSource> assumedDecoders = originalConfig.get(ConfigSourceList.class, "decoders", new ConfigSourceList());
318
+ ImmutableList.Builder<ConfigSource> added = ImmutableList.builder();
319
+ for (ConfigSource assuemed : assumedDecoders) {
320
+ added.add(Exec.newConfigSource());
321
+ }
322
+ added.addAll(guessedDecoders);
323
+ return guessed.set("decoders", added.build());
324
+ }
325
+ }
290
326
  }
291
327
 
292
328
  public static class GuessedNoticeError