embulk 0.7.9 → 0.7.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2a344520dc3aded72b2bf53a735a4a46f6d8668a
4
- data.tar.gz: 6211c301cce4f7d7a4beb9e7a011d6b7971965e2
3
+ metadata.gz: f28491871fb762807b53e4eb27d97b15800be9f9
4
+ data.tar.gz: 6e38eb058c63710bd9e43a5bd2c0218a8b797bb1
5
5
  SHA512:
6
- metadata.gz: 12dca07d9d57ae0f6d36466c0675a66e2755f09cc25d252eb5bcb14a86cfa6fd27110782f9ce1af0b0b31620da45a3666aae90c5e14d3d16f91ddab5227f805b
7
- data.tar.gz: 022627716d3db31dee527a5848f0d66766593e9c4a65dadd7c093cbc50bbbfdc81a34aa5aef6b3ca7a829db1f0a4c817c71b8d0beb26f028f0e7efdd65318fcc
6
+ metadata.gz: 5e2d90085bb8fddf565f4a936ec6ddced865fd3f71ea566bcc347904962564a67af243a41dff8d5875447a329f9f99a8e64f771e3dd7dc5d68ee86a7798fed9a
7
+ data.tar.gz: 2b6b729becf7333c1af3ae1fc44cfbb643709382aaf59369c53f37a2f2326f99fae17a48fc9e5b23fb5545f101e38f8e3b17e3aa0eaecd703f63f167a1e8536b
data/README.md CHANGED
@@ -135,6 +135,8 @@ Following command updates embulk itself to the specific released version.
135
135
  embulk selfupdate x.y.z
136
136
  ```
137
137
 
138
+ Older versions are available at [dl.embulk.org](http://dl.embulk.org).
139
+
138
140
 
139
141
  ## Embulk Development
140
142
 
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.7.9'
19
+ version = '0.7.10'
20
20
 
21
21
  ext {
22
22
  jrubyVersion = '9.0.0.0'
@@ -21,6 +21,7 @@ import org.embulk.exec.PreviewResult;
21
21
  import org.embulk.exec.ExecutionResult;
22
22
  import org.embulk.exec.PartialExecutionException;
23
23
  import org.embulk.exec.ResumeState;
24
+ import org.embulk.exec.TransactionStage;
24
25
  import org.embulk.spi.BufferAllocator;
25
26
  import org.embulk.spi.ExecSession;
26
27
  import org.embulk.guice.Bootstrap;
@@ -273,6 +274,11 @@ public class EmbulkEmbed
273
274
  checkState(partialExecutionException != null);
274
275
  return partialExecutionException.getResumeState();
275
276
  }
277
+
278
+ public TransactionStage getTransactionStage()
279
+ {
280
+ return partialExecutionException.getTransactionStage();
281
+ }
276
282
  }
277
283
 
278
284
  public class ResumeStateAction
@@ -74,6 +74,7 @@ public class BulkLoader
74
74
  private volatile List<TaskSource> filterTaskSources;
75
75
  private volatile List<Schema> schemas;
76
76
  private volatile Schema executorSchema;
77
+ private volatile TransactionStage transactionStage;
77
78
 
78
79
  private volatile ConfigDiff inputConfigDiff;
79
80
  private volatile ConfigDiff outputConfigDiff;
@@ -102,6 +103,11 @@ public class BulkLoader
102
103
  this.executorSchema = executorSchema;
103
104
  }
104
105
 
106
+ public void setTransactionStage(TransactionStage transactionStage)
107
+ {
108
+ this.transactionStage = transactionStage;
109
+ }
110
+
105
111
  public void setInputTaskSource(TaskSource inputTaskSource)
106
112
  {
107
113
  this.inputTaskSource = inputTaskSource;
@@ -220,20 +226,6 @@ public class BulkLoader
220
226
  return inputConfigDiff != null && outputConfigDiff != null;
221
227
  }
222
228
 
223
- public boolean isAnyStarted()
224
- {
225
- if (inputTaskStates == null) {
226
- // not initialized
227
- return false;
228
- }
229
- for (TaskState inputTaskState : inputTaskStates) {
230
- if (inputTaskState.isStarted()) {
231
- return true;
232
- }
233
- }
234
- return false;
235
- }
236
-
237
229
  public void setOutputConfigDiff(ConfigDiff outputConfigDiff)
238
230
  {
239
231
  if (outputConfigDiff == null) {
@@ -356,16 +348,19 @@ public class BulkLoader
356
348
 
357
349
  public ResumeState buildResumeState(ExecSession exec)
358
350
  {
351
+ Schema inputSchema = (schemas == null) ? null : schemas.get(0);
352
+ List<Optional<TaskReport>> inputTaskReports = (inputTaskStates == null) ? null : getInputTaskReports();
353
+ List<Optional<TaskReport>> outputTaskReports = (outputTaskStates == null) ? null : getOutputTaskReports();
359
354
  return new ResumeState(
360
355
  exec.getSessionExecConfig(),
361
356
  inputTaskSource, outputTaskSource,
362
- first(schemas), executorSchema,
363
- getInputTaskReports(), getOutputTaskReports());
357
+ inputSchema, executorSchema,
358
+ inputTaskReports, outputTaskReports);
364
359
  }
365
360
 
366
361
  public PartialExecutionException buildPartialExecuteException(Throwable cause, ExecSession exec)
367
362
  {
368
- return new PartialExecutionException(cause, buildResumeState(exec));
363
+ return new PartialExecutionException(cause, buildResumeState(exec), transactionStage);
369
364
  }
370
365
  }
371
366
 
@@ -513,26 +508,30 @@ public class BulkLoader
513
508
  final ProcessPluginSet plugins = new ProcessPluginSet(task);
514
509
 
515
510
  final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
511
+ state.setTransactionStage(TransactionStage.INPUT_BEGIN);
516
512
  try {
517
513
  ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
518
514
  public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
519
515
  {
520
516
  state.setInputTaskSource(inputTask);
517
+ state.setTransactionStage(TransactionStage.FILTER_BEGIN);
521
518
  Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
522
519
  public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
523
520
  {
524
521
  state.setSchemas(schemas);
525
522
  state.setFilterTaskSources(filterTasks);
523
+ state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
526
524
  exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
527
525
  public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
528
526
  {
529
527
  state.setExecutorSchema(executorSchema);
528
+ state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
530
529
  ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
531
530
  public List<TaskReport> run(final TaskSource outputTask)
532
531
  {
533
532
  state.setOutputTaskSource(outputTask);
534
-
535
533
  state.initialize(inputTaskCount, outputTaskCount);
534
+ state.setTransactionStage(TransactionStage.RUN);
536
535
 
537
536
  if (!state.isAllTasksCommitted()) { // inputTaskCount == 0
538
537
  execute(task, executor, state);
@@ -543,18 +542,23 @@ public class BulkLoader
543
542
  state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
544
543
  }
545
544
 
545
+ state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
546
546
  return state.getAllOutputTaskReports();
547
547
  }
548
548
  });
549
549
  state.setOutputConfigDiff(outputConfigDiff);
550
+ state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
550
551
  }
551
552
  });
553
+ state.setTransactionStage(TransactionStage.FILTER_COMMIT);
552
554
  }
553
555
  });
556
+ state.setTransactionStage(TransactionStage.INPUT_COMMIT);
554
557
  return state.getAllInputTaskReports();
555
558
  }
556
559
  });
557
560
  state.setInputConfigDiff(inputConfigDiff);
561
+ state.setTransactionStage(TransactionStage.CLEANUP);
558
562
 
559
563
  cleanupCommittedTransaction(config, state);
560
564
 
@@ -565,9 +569,6 @@ public class BulkLoader
565
569
  // ignore the exception
566
570
  return state.buildExecuteResultWithWarningException(ex);
567
571
  }
568
- if (!state.isAnyStarted()) {
569
- throw ex;
570
- }
571
572
  throw state.buildPartialExecuteException(ex, Exec.session());
572
573
  }
573
574
  }
@@ -580,6 +581,7 @@ public class BulkLoader
580
581
  final ProcessPluginSet plugins = new ProcessPluginSet(task);
581
582
 
582
583
  final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
584
+ state.setTransactionStage(TransactionStage.INPUT_BEGIN);
583
585
  try {
584
586
  ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputTaskReports().size(), new InputPlugin.Control() {
585
587
  public List<TaskReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
@@ -587,23 +589,27 @@ public class BulkLoader
587
589
  // TODO validate inputTask?
588
590
  // TODO validate inputSchema
589
591
  state.setInputTaskSource(inputTask);
592
+ state.setTransactionStage(TransactionStage.FILTER_BEGIN);
590
593
  Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
591
594
  public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
592
595
  {
593
596
  state.setSchemas(schemas);
594
597
  state.setFilterTaskSources(filterTasks);
598
+ state.setTransactionStage(TransactionStage.EXECUTOR_BEGIN);
595
599
  exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
596
600
  public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
597
601
  {
598
602
  // TODO validate executorSchema
599
603
  state.setExecutorSchema(executorSchema);
604
+ state.setTransactionStage(TransactionStage.OUTPUT_BEGIN);
600
605
  ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
601
606
  public List<TaskReport> run(final TaskSource outputTask)
602
607
  {
603
608
  // TODO validate outputTask?
604
609
  state.setOutputTaskSource(outputTask);
605
-
606
610
  restoreResumedTaskReports(resume, state);
611
+ state.setTransactionStage(TransactionStage.RUN);
612
+
607
613
  if (!state.isAllTasksCommitted()) {
608
614
  execute(task, executor, state);
609
615
  }
@@ -613,18 +619,23 @@ public class BulkLoader
613
619
  state.countUncommittedInputTasks(), state.countUncommittedOutputTasks()));
614
620
  }
615
621
 
622
+ state.setTransactionStage(TransactionStage.OUTPUT_COMMIT);
616
623
  return state.getAllOutputTaskReports();
617
624
  }
618
625
  });
619
626
  state.setOutputConfigDiff(outputConfigDiff);
627
+ state.setTransactionStage(TransactionStage.EXECUTOR_COMMIT);
620
628
  }
621
629
  });
630
+ state.setTransactionStage(TransactionStage.FILTER_COMMIT);
622
631
  }
623
632
  });
633
+ state.setTransactionStage(TransactionStage.INPUT_COMMIT);
624
634
  return state.getAllInputTaskReports();
625
635
  }
626
636
  });
627
637
  state.setInputConfigDiff(inputConfigDiff);
638
+ state.setTransactionStage(TransactionStage.CLEANUP);
628
639
 
629
640
  cleanupCommittedTransaction(config, state);
630
641
 
@@ -635,9 +646,6 @@ public class BulkLoader
635
646
  // ignore the exception
636
647
  return state.buildExecuteResultWithWarningException(ex);
637
648
  }
638
- if (!state.isAnyStarted()) {
639
- throw ex;
640
- }
641
649
  throw state.buildPartialExecuteException(ex, Exec.session());
642
650
  }
643
651
  }
@@ -31,6 +31,7 @@ import org.embulk.spi.FileInput;
31
31
  import org.embulk.spi.PageOutput;
32
32
  import org.embulk.spi.TransactionalFileInput;
33
33
  import org.embulk.spi.FileInputRunner;
34
+ import static org.embulk.spi.util.Inputs.each;
34
35
 
35
36
  public class GuessExecutor
36
37
  {
@@ -294,7 +295,7 @@ public class GuessExecutor
294
295
  final ConfigSource originalConfig = task.getOriginalConfig();
295
296
 
296
297
  // get sample buffer
297
- Buffer sample = getFirstBuffer(input);
298
+ Buffer sample = readSample(input, 32*1024); // TODO get sample size from system config. See also SamplingParserPlugin().
298
299
 
299
300
  // load guess plugins
300
301
  ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
@@ -320,24 +321,18 @@ public class GuessExecutor
320
321
  throw new GuessedNoticeError(mergedGuessed);
321
322
  }
322
323
 
323
- private static Buffer getFirstBuffer(FileInput input)
324
+ private static Buffer readSample(FileInput fileInput, int sampleSize)
324
325
  {
325
- // The first buffer is created by SamplingParserPlugin. See FileInputRunner.guess.
326
- RuntimeException decodeException = null;
326
+ Buffer sample = Buffer.allocate(sampleSize);
327
327
  try {
328
- while (input.nextFile()) {
329
- Buffer sample = input.poll();
330
- if (sample != null) {
331
- return sample;
332
- }
333
- }
328
+ SamplingParserPlugin.readSample(fileInput, sample, 0, sampleSize);
334
329
  } catch (RuntimeException ex) {
335
330
  // ignores exceptions because FileDecoderPlugin can throw exceptions
336
- // such as "Unexpected end of ZLIB input stream"
337
- decodeException = ex;
331
+ // such as "Unexpected end of ZLIB input stream" if decoder plugin
332
+ // is wrongly guessed.
338
333
  }
339
- if (decodeException != null) {
340
- throw decodeException;
334
+ if (sample.limit() > 0) {
335
+ return sample;
341
336
  }
342
337
  throw new NoSampleException("No input buffer to guess");
343
338
  }
@@ -4,15 +4,23 @@ public class PartialExecutionException
4
4
  extends RuntimeException
5
5
  {
6
6
  private final ResumeState resumeState;
7
+ private final TransactionStage transactionStage;
7
8
 
8
- public PartialExecutionException(Throwable cause, ResumeState resumeState)
9
+ public PartialExecutionException(Throwable cause, ResumeState resumeState,
10
+ TransactionStage transactionStage)
9
11
  {
10
12
  super(cause);
11
13
  this.resumeState = resumeState;
14
+ this.transactionStage = transactionStage;
12
15
  }
13
16
 
14
17
  public ResumeState getResumeState()
15
18
  {
16
19
  return resumeState;
17
20
  }
21
+
22
+ public TransactionStage getTransactionStage()
23
+ {
24
+ return transactionStage;
25
+ }
18
26
  }
@@ -90,7 +90,7 @@ public class SamplingParserPlugin
90
90
  public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
91
91
  {
92
92
  this.minSampleSize = 40; // empty gzip file is 33 bytes. // TODO get sample size from system config
93
- this.sampleSize = 32*1024; // TODO get sample size from system config
93
+ this.sampleSize = 32*1024; // TODO get sample size from system config. See also GuessExecutor.run.
94
94
  Preconditions.checkArgument(minSampleSize < sampleSize, "minSampleSize must be smaller than sampleSize");
95
95
  }
96
96
 
@@ -111,26 +111,32 @@ public class SamplingParserPlugin
111
111
  throw new SampledNoticeError(buffer);
112
112
  }
113
113
 
114
- private static Buffer readSample(FileInput fileInput, int sampleSize)
114
+ public static Buffer readSample(FileInput fileInput, int sampleSize)
115
+ {
116
+ return readSample(fileInput, Buffer.allocate(sampleSize), 0, sampleSize);
117
+ }
118
+
119
+ public static Buffer readSample(FileInput fileInput, Buffer sample, int offset, int sampleSize)
115
120
  {
116
121
  if (!fileInput.nextFile()) {
117
122
  // no input files
118
- return Buffer.EMPTY;
123
+ return sample;
119
124
  }
120
125
 
121
- Buffer sample = Buffer.allocate(sampleSize);
122
- int offset = 0;
123
-
124
- for (Buffer buffer : each(fileInput)) {
125
- int size = Math.min(buffer.limit(), sample.capacity() - offset);
126
- sample.setBytes(offset, buffer, 0, size);
127
- offset += size;
128
- buffer.release();
129
- if (offset >= sampleSize) {
130
- break;
126
+ try {
127
+ for (Buffer buffer : each(fileInput)) {
128
+ int size = Math.min(buffer.limit(), sample.capacity() - offset);
129
+ sample.setBytes(offset, buffer, 0, size);
130
+ offset += size;
131
+ buffer.release();
132
+ if (offset >= sampleSize) {
133
+ break;
134
+ }
131
135
  }
132
136
  }
133
- sample.limit(offset);
137
+ finally {
138
+ sample.limit(offset);
139
+ }
134
140
  return sample;
135
141
  }
136
142
  }
@@ -0,0 +1,27 @@
1
+ package org.embulk.exec;
2
+
3
+ public enum TransactionStage
4
+ {
5
+ INPUT_BEGIN(1),
6
+ FILTER_BEGIN(2),
7
+ EXECUTOR_BEGIN(3),
8
+ OUTPUT_BEGIN(4),
9
+ RUN(5),
10
+ OUTPUT_COMMIT(6),
11
+ EXECUTOR_COMMIT(7),
12
+ FILTER_COMMIT(8),
13
+ INPUT_COMMIT(9),
14
+ CLEANUP(10);
15
+
16
+ private final int index;
17
+
18
+ private TransactionStage(int index)
19
+ {
20
+ this.index = index;
21
+ }
22
+
23
+ public boolean isBefore(TransactionStage another)
24
+ {
25
+ return index < another.index;
26
+ }
27
+ }
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.7.10
7
8
  release/release-0.7.9
8
9
  release/release-0.7.8
9
10
  release/release-0.7.7
@@ -0,0 +1,13 @@
1
+ Release 0.7.10
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Fixed a problem where guessing reads only 512 bytes when input is gzip-compressed and gzip decoder is guseed during the guessing.
8
+ * Added ``PartialExecutionException.getTransactionStage()`` method that tells in which stage a transaction failed. It will be either of beginning of (input, filter, executor, output), run, end of (output, executor, filter, input), or cleanup.
9
+
10
+
11
+ Release Date
12
+ ------------------
13
+ 2015-12-01
@@ -43,7 +43,7 @@ public class GzipFileDecoderPlugin
43
43
  if (!files.nextFile()) {
44
44
  return null;
45
45
  }
46
- return new GZIPInputStream(files);
46
+ return new GZIPInputStream(files, 8*1024);
47
47
  }
48
48
 
49
49
  public void close() throws IOException
data/lib/embulk/runner.rb CHANGED
@@ -79,18 +79,22 @@ module Embulk
79
79
 
80
80
  unless executionResult
81
81
  unless resumableResult.isSuccessful
82
- Embulk.logger.info "Writing resume state to '#{resume_state_path}'"
83
- write_config(resume_state_path, resumableResult.getResumeState)
84
- Embulk.logger.info "Resume state is written. Run the transaction again with -r option to resume or use \"cleanup\" subcommand to delete intermediate data."
82
+ if resumableResult.getTransactionStage.isBefore(org.embulk.exec.TransactionStage::RUN)
83
+ # retry without resume state file if no tasks started yet
84
+ # delete resume file
85
+ File.delete(resume_state_path) rescue nil if resume_state_path
86
+ else
87
+ Embulk.logger.info "Writing resume state to '#{resume_state_path}'"
88
+ write_config(resume_state_path, resumableResult.getResumeState)
89
+ Embulk.logger.info "Resume state is written. Run the transaction again with -r option to resume or use \"cleanup\" subcommand to delete intermediate data."
90
+ end
85
91
  raise resumableResult.getCause
86
92
  end
87
93
  executionResult = resumableResult.getSuccessfulResult
88
94
  end
89
95
 
90
96
  # delete resume file
91
- if resume_state_path
92
- File.delete(resume_state_path) rescue nil
93
- end
97
+ File.delete(resume_state_path) rescue nil if resume_state_path
94
98
 
95
99
  configDiff = executionResult.getConfigDiff
96
100
  Embulk.logger.info("Committed.")
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.7.9'
2
+ VERSION = '0.7.10'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.9
4
+ version: 0.7.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-11 00:00:00.000000000 Z
11
+ date: 2015-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jruby-jars
@@ -107,8 +107,8 @@ files:
107
107
  - classpath/bval-jsr303-0.5.jar
108
108
  - classpath/commons-beanutils-core-1.8.3.jar
109
109
  - classpath/commons-lang3-3.1.jar
110
- - classpath/embulk-core-0.7.9.jar
111
- - classpath/embulk-standards-0.7.9.jar
110
+ - classpath/embulk-core-0.7.10.jar
111
+ - classpath/embulk-standards-0.7.10.jar
112
112
  - classpath/guava-18.0.jar
113
113
  - classpath/guice-4.0.jar
114
114
  - classpath/guice-multibindings-4.0.jar
@@ -185,6 +185,7 @@ files:
185
185
  - embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java
186
186
  - embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java
187
187
  - embulk-core/src/main/java/org/embulk/exec/TempFileAllocator.java
188
+ - embulk-core/src/main/java/org/embulk/exec/TransactionStage.java
188
189
  - embulk-core/src/main/java/org/embulk/guice/Bootstrap.java
189
190
  - embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java
190
191
  - embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java
@@ -409,6 +410,7 @@ files:
409
410
  - embulk-docs/src/release/release-0.6.9.rst
410
411
  - embulk-docs/src/release/release-0.7.0.rst
411
412
  - embulk-docs/src/release/release-0.7.1.rst
413
+ - embulk-docs/src/release/release-0.7.10.rst
412
414
  - embulk-docs/src/release/release-0.7.2.rst
413
415
  - embulk-docs/src/release/release-0.7.3.rst
414
416
  - embulk-docs/src/release/release-0.7.4.rst