embulk 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NGUwOTc0ZDE1MWZlZjJhYjdhNmJmMjQwZjliOWU3MmEyYmM5ZTczNQ==
4
+ N2ZlNTcxZjM0ZjA4MDE5YmM0NmU2ZTQ3ZmZkM2ZkZDM5OWFkNjRiNA==
5
5
  data.tar.gz: !binary |-
6
- ZjA1YTE5NDlhZGViMTU1NjVmOTBhZDVlZDY5NGZjODI0NGU5OGViZA==
6
+ NzU2NjYzOTQzM2ExYzdiNzc0YzUzYWZiNzcwZTU2ZmFkZmUxYjMzOQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NDY3NzQ1NTkxNTk5MzAzMGQ2ZmIzYjM0YjMyMTczOGM1YjhmYzFkYTg0YTY3
10
- ZDBlNzdiYWIwZmVkMWU5YzA3NTEyYzA2ZGI3YjQyMTQ5ZDI2MWI3ZWEwZTM0
11
- YjQ3MTllMTZkYzdlYTM2YjNlZDVjNGIwYjEwNDVhNjlmN2IxYTk=
9
+ NWM5ZjJiNjhkNDQwZDgyZGU4MzhiMmNmYjI5OGZhMjk2OTAyM2FlOWM4YzYy
10
+ ZTczZTgzMDBkYjY3ZmZmODM0NGU0NmM2MTBjYmRiNDY1ZDliY2QzZmM2OTZi
11
+ ZmIzZjZmYzc4MTJkOWE2ODM5ODAxYTZhMzgzNzE0NzM5YzgyODU=
12
12
  data.tar.gz: !binary |-
13
- MGZjMzM1NmVhNzdhZDhjODg3ZWZiNGRmOWQwMTU5MzUwZmEwYTBkMDY1MTgz
14
- NDBhOTAwM2Y3NDNjM2VlZTE1YjRkZjA4MWNiZjZjN2QzOTBjYTliMzJlYTgw
15
- OGY2ZGZmMDJmMTI4ZWU1YjNmMTMxNTc5NDdjN2NiODkxYzQ4MmI=
13
+ NTg3MjdiZWNjNGYzN2MzZTM0YzY0OTRmNGJlYmI5ODY3ZmY5ZWFiY2RkN2Yy
14
+ NjI0ZmIzOWJjNDU0NzM4ZDhlYTJjZTBkODc3MmRjYjI5Mzc5OWUyZDQwYzA5
15
+ MTRiMGE1NmU4YmI1MWU1Yjk1ZmJlZTU3Y2Y0NDY5YTFjN2EzOTQ=
data/ChangeLog CHANGED
@@ -1,4 +1,16 @@
1
1
 
2
+ 2015-02-03 version 0.3.0:
3
+
4
+ * Added resume functionality. InputPlugin and OutputPlugin needs to implement
5
+ resume and cleanup methods.
6
+ * cli: embulk-run supports -r, --resume-state PATH option.
7
+ * Added FilterInputPlugin Java API.
8
+ * Added FilterInputPlugin JRuby API.
9
+ * Configuration file accepts filters: array entry.
10
+ * Added gradle-versions-plugin to build.gradle (@seratch++)
11
+ * Fixed broken dependencies at build.gradle (@thagikura++)
12
+
13
+
2
14
  2015-01-29 version 0.2.1:
3
15
 
4
16
  * Fixed LineEncoder#finish to flush all remaining buffer (reported by @aibou)
data/README.md CHANGED
@@ -1,27 +1,30 @@
1
- # Embulk
2
-
3
- A plugin-based parallel bulk data loader that makes painful data integration works relaxed.
4
-
5
- ## What's Embulk?
1
+ # What's Embulk?
6
2
 
7
3
  Embulk is a plugin-based parallel bulk data loader that helps **data transfer** between various **storages**, **databases**, **NoSQL** and **cloud services**.
8
4
 
9
- You can install input and output plugins to integrate many other file formats and storages.
10
-
11
- You also can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying.
12
- Packaging effrots into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
5
+ You can release plugins to share your efforts of data cleaning, error handling, transaction control, and retrying. Packaging effrots into plugins **brings OSS-style development to the data scripts** which **was tend to be one-time adhoc scripts**.
13
6
 
14
- [Embuk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
7
+ [Embulk, an open-source plugin-based parallel bulk data loader](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed) at Slideshare
15
8
 
16
9
  [![Embulk](https://gist.githubusercontent.com/frsyuki/f322a77ee2766a508ba9/raw/e8539b6b4fda1b3357e8c79d3966aa8148dbdbd3/embulk-overview.png)](http://www.slideshare.net/frsyuki/embuk-making-data-integration-works-relaxed/12)
17
10
 
11
+ # Document
12
+
13
+ * [Quick Start](#quick-start)
14
+ * [Using plugins](#using-plugins)
15
+ * [Using plugin bundle](#using-plugin-bundle)
16
+ * [Releasing plugins to RubyGems](#releasing-plugins-to-rubygems)
17
+ * [Resuming a failed transaction](#resuming-a-failed-transaction)
18
+ * [Embulk Development](#embulk-development)
19
+ * [Build](#build)
20
+ * [Release](#release)
18
21
 
19
22
  ## Quick Start
20
23
 
21
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
22
25
 
23
26
  ```
24
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.2.1.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.3.0.jar -O embulk.jar
25
28
  java -jar embulk.jar --help
26
29
  ```
27
30
 
@@ -37,10 +40,10 @@ java -jar embulk.jar run config.yml
37
40
  ### Using plugins
38
41
 
39
42
  You can use plugins to load data from/to various systems and file formats.
40
- An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
43
+ An example is [embulk-plugin-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
41
44
 
42
45
  ```
43
- java -jar embulk.jar gem install embulk-output-postgres-json
46
+ java -jar embulk.jar gem install embulk-plugin-postgres-json
44
47
  java -jar embulk.jar gem list
45
48
  ```
46
49
 
@@ -67,6 +70,28 @@ TODO: documents
67
70
  embulk-plugin-xyz
68
71
  ```
69
72
 
73
+ ### Resuming a failed transaction
74
+
75
+ Embulk supports resuming failed transactions.
76
+ To enable resuming, you need to start transaction with `-r PATH` option:
77
+
78
+ ```
79
+ java -jar embulk.jar run config.yml -r resume-state.yml
80
+ ```
81
+
82
+ If the transaction fails, embulk stores state some states to the yaml file. You can retry the transaction using exactly same command:
83
+
84
+ ```
85
+ java -jar embulk.jar run config.yml -r resume-state.yml
86
+ ```
87
+
88
+ If you giveup to resume the transaction, you can use `embulk cleanup` subcommand to delete intermediate data:
89
+
90
+ ```
91
+ java -jar embulk.jar cleanup config.yml -r resume-state.yml
92
+ ```
93
+
94
+
70
95
  ## Embulk Development
71
96
 
72
97
  ### Build
@@ -9,6 +9,7 @@ buildscript {
9
9
  }
10
10
  dependencies {
11
11
  classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.0'
12
+ classpath 'com.github.ben-manes:gradle-versions-plugin:0.7'
12
13
  }
13
14
  }
14
15
 
@@ -22,8 +23,12 @@ allprojects {
22
23
  apply plugin: 'maven-publish'
23
24
  apply plugin: 'com.jfrog.bintray'
24
25
 
26
+ // determine which dependencies have updates
27
+ // $ gradle dependencyUpdates
28
+ apply plugin: 'com.github.ben-manes.versions'
29
+
25
30
  group = 'org.embulk'
26
- version = '0.2.1'
31
+ version = '0.3.0'
27
32
 
28
33
  // to upload artifacts to Bintray by gradle-bintray-plugin
29
34
  // $ gradle bintrayUpload
@@ -5,7 +5,7 @@
5
5
  <parent>
6
6
  <groupId>org.embulk</groupId>
7
7
  <artifactId>embulk-parent</artifactId>
8
- <version>0.2.1-SNAPSHOT</version>
8
+ <version>0.3.0-SNAPSHOT</version>
9
9
  </parent>
10
10
 
11
11
  <artifactId>embulk-cli</artifactId>
@@ -5,7 +5,7 @@
5
5
  <parent>
6
6
  <groupId>org.embulk</groupId>
7
7
  <artifactId>embulk-parent</artifactId>
8
- <version>0.2.1-SNAPSHOT</version>
8
+ <version>0.3.0-SNAPSHOT</version>
9
9
  </parent>
10
10
 
11
11
  <artifactId>embulk-core</artifactId>
@@ -21,10 +21,12 @@ import org.embulk.config.NextConfig;
21
21
  import org.embulk.config.ModelManager;
22
22
  import org.embulk.config.ConfigException;
23
23
  import org.embulk.exec.LocalExecutor;
24
- import org.embulk.exec.ExecuteResult;
24
+ import org.embulk.exec.ExecutionResult;
25
25
  import org.embulk.exec.GuessExecutor;
26
26
  import org.embulk.exec.PreviewExecutor;
27
27
  import org.embulk.exec.PreviewResult;
28
+ import org.embulk.exec.ResumeState;
29
+ import org.embulk.exec.PartialExecutionException;
28
30
  import org.embulk.spi.time.Timestamp;
29
31
  import org.embulk.spi.ExecSession;
30
32
  import org.embulk.spi.util.Pages;
@@ -36,6 +38,9 @@ public class Runner
36
38
  {
37
39
  private String nextConfigOutputPath;
38
40
  public String getNextConfigOutputPath() { return nextConfigOutputPath; }
41
+
42
+ private String resumeStatePath;
43
+ public String getResumeStatePath() { return resumeStatePath; }
39
44
  }
40
45
 
41
46
  private final Options options;
@@ -58,6 +63,9 @@ public class Runner
58
63
  case "run":
59
64
  run(args[0]);
60
65
  break;
66
+ case "cleanup":
67
+ cleanup(args[0]);
68
+ break;
61
69
  case "guess":
62
70
  guess(args[0]);
63
71
  break;
@@ -72,21 +80,87 @@ public class Runner
72
80
  public void run(String configPath)
73
81
  {
74
82
  ConfigSource config = loadYamlConfig(configPath);
75
- checkNextConfigOutputPath(options.getNextConfigOutputPath());
83
+ checkFileWritable(options.getNextConfigOutputPath());
84
+ checkFileWritable(options.getResumeStatePath());
85
+
86
+ // load resume state file
87
+ ResumeState resume = null;
88
+ String resumePath = options.getResumeStatePath();
89
+ if (resumePath != null) {
90
+ ConfigSource resumeConfig = null;
91
+ try {
92
+ resumeConfig = loadYamlConfig(resumePath);
93
+ if (resumeConfig.isEmpty()) {
94
+ resumeConfig = null;
95
+ }
96
+ } catch (RuntimeException ex) {
97
+ // leave resumeConfig == null
98
+ }
99
+ if (resumeConfig != null) {
100
+ resume = resumeConfig.loadConfig(ResumeState.class);
101
+ }
102
+ }
76
103
 
77
104
  ExecSession exec = newExecSession(config);
78
105
  LocalExecutor local = injector.getInstance(LocalExecutor.class);
79
- ExecuteResult result = local.run(exec, config);
80
- NextConfig nextConfig = result.getNextConfig();
106
+ ExecutionResult result;
107
+ try {
108
+ if (resume != null) {
109
+ result = local.resume(config, resume);
110
+ } else {
111
+ result = local.run(exec, config);
112
+ }
113
+ } catch (PartialExecutionException partial) {
114
+ if (options.getResumeStatePath() == null) {
115
+ // resume state path is not set. cleanup the transaction
116
+ exec.getLogger(Runner.class).info("Transaction partially failed. Cleaning up the intermediate data. Use -r option to make it resumable.");
117
+ try {
118
+ local.cleanup(config, partial.getResumeState());
119
+ } catch (Throwable ex) {
120
+ partial.addSuppressed(ex);
121
+ }
122
+ throw partial;
123
+ }
124
+ // save the resume state
125
+ exec.getLogger(Runner.class).info("Writing resume state to '{}'", options.getResumeStatePath());
126
+ writeYaml(options.getResumeStatePath(), partial.getResumeState());
127
+ exec.getLogger(Runner.class).info("Resume state is written. Run the transaction again with -r option to resume or use \"cleanup\" subcommand to delete intermediate data.");
128
+ throw partial;
129
+ }
81
130
 
131
+ // delete resume file
132
+ if (options.getResumeStatePath() != null) {
133
+ new File(options.getResumeStatePath()).delete();
134
+ }
135
+
136
+ // write next config
137
+ NextConfig nextConfig = result.getNextConfig();
82
138
  exec.getLogger(Runner.class).info("next config: {}", nextConfig.toString());
83
139
  writeNextConfig(options.getNextConfigOutputPath(), config, nextConfig);
84
140
  }
85
141
 
142
+ public void cleanup(String configPath)
143
+ {
144
+ String resumePath = options.getResumeStatePath();
145
+ if (resumePath == null) {
146
+ throw new IllegalArgumentException("Resume path is required for cleanup");
147
+ }
148
+ ConfigSource config = loadYamlConfig(configPath);
149
+ ConfigSource resumeConfig = loadYamlConfig(resumePath);
150
+ ResumeState resume = resumeConfig.loadConfig(ResumeState.class);
151
+
152
+ ExecSession exec = newExecSession(config);
153
+ LocalExecutor local = injector.getInstance(LocalExecutor.class);
154
+ local.cleanup(config, resume);
155
+
156
+ // delete resume file
157
+ new File(options.getResumeStatePath()).delete();
158
+ }
159
+
86
160
  public void guess(String partialConfigPath)
87
161
  {
88
162
  ConfigSource config = loadYamlConfig(partialConfigPath);
89
- checkNextConfigOutputPath(options.getNextConfigOutputPath());
163
+ checkFileWritable(options.getNextConfigOutputPath());
90
164
 
91
165
  ExecSession exec = newExecSession(config);
92
166
  GuessExecutor guess = injector.getInstance(GuessExecutor.class);
@@ -96,7 +170,7 @@ public class Runner
96
170
  System.err.println(yml);
97
171
  }
98
172
 
99
- private void checkNextConfigOutputPath(String path)
173
+ private void checkFileWritable(String path)
100
174
  {
101
175
  if (path != null) {
102
176
  try (FileOutputStream in = new FileOutputStream(path, true)) {
@@ -109,7 +183,12 @@ public class Runner
109
183
 
110
184
  private String writeNextConfig(String path, ConfigSource originalConfig, NextConfig nextConfigDiff)
111
185
  {
112
- String yml = dumpConfigInYaml(originalConfig.merge(nextConfigDiff));
186
+ return writeYaml(path, originalConfig.merge(nextConfigDiff));
187
+ }
188
+
189
+ private String writeYaml(String path, Object obj)
190
+ {
191
+ String yml = dumpYaml(obj);
113
192
  if (path != null) {
114
193
  if (path.equals("-")) {
115
194
  System.out.print(yml);
@@ -181,7 +260,7 @@ public class Runner
181
260
  }
182
261
  }
183
262
 
184
- private String dumpConfigInYaml(DataSource config)
263
+ private String dumpYaml(Object config)
185
264
  {
186
265
  ModelManager model = injector.getInstance(ModelManager.class);
187
266
  Map<String, Object> map = model.readObject(MapType.class, model.writeObject(config));
@@ -3,7 +3,7 @@ package org.embulk.config;
3
3
  public interface ConfigSource
4
4
  extends DataSource
5
5
  {
6
- public <T extends Task> T loadConfig(Class<T> taskType);
6
+ public <T> T loadConfig(Class<T> taskType);
7
7
 
8
8
  @Override
9
9
  public ConfigSource getNested(String attrName);
@@ -170,17 +170,30 @@ public class DataSourceImpl
170
170
 
171
171
  private static void mergeJsonArray(ArrayNode src, ArrayNode other)
172
172
  {
173
- src.addAll(other);
173
+ for (int i=0; i < other.size(); i++) {
174
+ JsonNode s = src.get(i);
175
+ JsonNode v = other.get(i);
176
+ if (s == null) {
177
+ src.add(v);
178
+ } else if (v.isObject() && s.isObject()) {
179
+ mergeJsonObject((ObjectNode) s, (ObjectNode) v);
180
+ } else if (v.isArray() && s.isArray()) {
181
+ mergeJsonArray((ArrayNode) s, (ArrayNode) v);
182
+ } else {
183
+ src.remove(i);
184
+ src.insert(i, v);
185
+ }
186
+ }
174
187
  }
175
188
 
176
189
  @Override
177
- public <T extends Task> T loadTask(Class<T> taskType)
190
+ public <T> T loadTask(Class<T> taskType)
178
191
  {
179
192
  return model.readObject(taskType, data.traverse());
180
193
  }
181
194
 
182
195
  @Override
183
- public <T extends Task> T loadConfig(Class<T> taskType)
196
+ public <T> T loadConfig(Class<T> taskType)
184
197
  {
185
198
  return model.readObjectWithConfigSerDe(taskType, data.traverse());
186
199
  }
@@ -3,7 +3,7 @@ package org.embulk.config;
3
3
  public interface TaskSource
4
4
  extends DataSource
5
5
  {
6
- public <T extends Task> T loadTask(Class<T> taskType);
6
+ public <T> T loadTask(Class<T> taskType);
7
7
 
8
8
  @Override
9
9
  public TaskSource getNested(String attrName);
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class ExecutionInterruptedException
4
+ extends RuntimeException
5
+ {
6
+ public ExecutionInterruptedException(Exception cause)
7
+ {
8
+ super(cause);
9
+ }
10
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.NextConfig;
5
+
6
+ public class ExecutionResult
7
+ {
8
+ private final NextConfig nextConfig;
9
+ private final List<Throwable> ignoredExceptions;
10
+
11
+ public ExecutionResult(NextConfig nextConfig, List<Throwable> ignoredExceptions)
12
+ {
13
+ this.nextConfig = nextConfig;
14
+ this.ignoredExceptions = ignoredExceptions;
15
+ }
16
+
17
+ public NextConfig getNextConfig()
18
+ {
19
+ return nextConfig;
20
+ }
21
+
22
+ public List<Throwable> getIgnoredExceptions()
23
+ {
24
+ return ignoredExceptions;
25
+ }
26
+ }
@@ -158,7 +158,7 @@ public class GuessExecutor
158
158
  private static class BufferFileInputPlugin
159
159
  implements FileInputPlugin
160
160
  {
161
- private final Buffer buffer;
161
+ private Buffer buffer;
162
162
 
163
163
  public BufferFileInputPlugin(Buffer buffer)
164
164
  {
@@ -171,6 +171,23 @@ public class GuessExecutor
171
171
  return Exec.newNextConfig();
172
172
  }
173
173
 
174
+ public NextConfig resume(TaskSource taskSource,
175
+ int processorCount,
176
+ FileInputPlugin.Control control)
177
+ {
178
+ throw new UnsupportedOperationException();
179
+ }
180
+
181
+ public void cleanup(TaskSource taskSource,
182
+ int processorCount,
183
+ List<CommitReport> successCommitReports)
184
+ {
185
+ if (buffer != null) {
186
+ buffer.release();
187
+ buffer = null;
188
+ }
189
+ }
190
+
174
191
  public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
175
192
  {
176
193
  return new BufferTransactionalFileInput(buffer);
@@ -257,6 +274,7 @@ public class GuessExecutor
257
274
  NextConfig mergedGuessed = Exec.newNextConfig();
258
275
  for (int i=0; i < guesses.size(); i++) {
259
276
  NextConfig guessed = guesses.get(i).guess(originalConfig, sample);
277
+ guessed = addAssumedDecoderConfigs(originalConfig, guessed);
260
278
  mergedGuessed.merge(guessed);
261
279
  mergedConfig.merge(mergedGuessed);
262
280
  if (!mergedConfig.equals(originalConfig)) {
@@ -287,6 +305,24 @@ public class GuessExecutor
287
305
  }
288
306
  throw new NoSampleException("No input buffer to guess");
289
307
  }
308
+
309
+ private static class ConfigSourceList extends ArrayList<ConfigSource> { };
310
+
311
+ private static NextConfig addAssumedDecoderConfigs(ConfigSource originalConfig, NextConfig guessed)
312
+ {
313
+ List<ConfigSource> guessedDecoders = guessed.get(ConfigSourceList.class, "decoders", null);
314
+ if (guessedDecoders == null) {
315
+ return guessed;
316
+ } else {
317
+ List<ConfigSource> assumedDecoders = originalConfig.get(ConfigSourceList.class, "decoders", new ConfigSourceList());
318
+ ImmutableList.Builder<ConfigSource> added = ImmutableList.builder();
319
+ for (ConfigSource assuemed : assumedDecoders) {
320
+ added.add(Exec.newConfigSource());
321
+ }
322
+ added.addAll(guessedDecoders);
323
+ return guessed.set("decoders", added.build());
324
+ }
325
+ }
290
326
  }
291
327
 
292
328
  public static class GuessedNoticeError