embulk 0.8.18-java → 0.8.19-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +10 -0
  3. data/build.gradle +10 -3
  4. data/embulk-cli/build.gradle +2 -0
  5. data/embulk-cli/src/main/bat/selfrun.bat +98 -0
  6. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +82 -0
  7. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkMigrate.java +458 -0
  8. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkNew.java +419 -0
  9. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkSelfUpdate.java +248 -0
  10. data/embulk-cli/src/main/sh/selfrun.sh +0 -103
  11. data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +158 -143
  12. data/embulk-core/build.gradle +2 -2
  13. data/embulk-core/src/main/java/org/embulk/EmbulkVersion.java +109 -0
  14. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +11 -0
  15. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +29 -3
  16. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +47 -13
  17. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +6 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +385 -64
  19. data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +2 -1
  20. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +62 -0
  21. data/embulk-docs/src/built-in.rst +59 -21
  22. data/embulk-docs/src/customization.rst +8 -8
  23. data/embulk-docs/src/developers/index.rst +45 -0
  24. data/embulk-docs/src/index.rst +11 -7
  25. data/embulk-docs/src/recipe.rst +1 -1
  26. data/embulk-docs/src/recipe/{scheduled-csv-load-to-elasticsearch-kibana4.rst → scheduled-csv-load-to-elasticsearch-kibana5.rst} +26 -24
  27. data/embulk-docs/src/release.rst +1 -0
  28. data/embulk-docs/src/release/release-0.4.0.rst +1 -1
  29. data/embulk-docs/src/release/release-0.5.0.rst +1 -1
  30. data/embulk-docs/src/release/release-0.6.0.rst +1 -1
  31. data/embulk-docs/src/release/release-0.6.20.rst +1 -1
  32. data/embulk-docs/src/release/release-0.8.19.rst +43 -0
  33. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -2
  34. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +30 -1
  35. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +10 -0
  36. data/embulk-standards/src/test/java/org/embulk/standards/preview/TestFilePreview.java +73 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records.csv +5 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_guessed.yml +2 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_seed.yml +1 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes.csv +5 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_exec.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_load.yml +19 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_previewed.csv +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple.csv +5 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_load.yml +19 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_previewed.csv +4 -0
  47. data/embulk-test/src/main/java/org/embulk/test/PreviewResultInputPlugin.java +65 -0
  48. data/embulk-test/src/main/java/org/embulk/test/TestingBulkLoader.java +5 -0
  49. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +59 -2
  50. data/embulk.gemspec +2 -1
  51. data/lib/embulk/command/embulk_run.rb +11 -49
  52. data/lib/embulk/data/new/README.md.vm +106 -0
  53. data/lib/embulk/data/new/{gitignore.erb → gitignore.vm} +3 -3
  54. data/lib/embulk/data/new/java/{build.gradle.erb → build.gradle.vm} +8 -8
  55. data/lib/embulk/data/new/java/{decoder.java.erb → decoder.java.vm} +6 -4
  56. data/lib/embulk/data/new/java/{encoder.java.erb → encoder.java.vm} +7 -5
  57. data/lib/embulk/data/new/java/{file_input.java.erb → file_input.java.vm} +9 -7
  58. data/lib/embulk/data/new/java/{file_output.java.erb → file_output.java.vm} +7 -5
  59. data/lib/embulk/data/new/java/{filter.java.erb → filter.java.vm} +4 -3
  60. data/lib/embulk/data/new/java/{formatter.java.erb → formatter.java.vm} +5 -4
  61. data/lib/embulk/data/new/java/{input.java.erb → input.java.vm} +6 -4
  62. data/lib/embulk/data/new/java/{output.java.erb → output.java.vm} +7 -5
  63. data/lib/embulk/data/new/java/{parser.java.erb → parser.java.vm} +5 -4
  64. data/lib/embulk/data/new/java/plugin_loader.rb.vm +3 -0
  65. data/lib/embulk/data/new/java/test.java.vm +5 -0
  66. data/lib/embulk/data/new/ruby/decoder_guess.rb.vm +25 -0
  67. data/lib/embulk/data/new/ruby/{filter.rb.erb → filter.rb.vm} +2 -2
  68. data/lib/embulk/data/new/ruby/{formatter.rb.erb → formatter.rb.vm} +2 -2
  69. data/lib/embulk/data/new/ruby/gemspec.vm +20 -0
  70. data/lib/embulk/data/new/ruby/{input.rb.erb → input.rb.vm} +10 -10
  71. data/lib/embulk/data/new/ruby/{output.rb.erb → output.rb.vm} +7 -7
  72. data/lib/embulk/data/new/ruby/{parser.rb.erb → parser.rb.vm} +2 -2
  73. data/lib/embulk/data/new/ruby/parser_guess.rb.vm +65 -0
  74. data/lib/embulk/guess/csv.rb +5 -0
  75. data/lib/embulk/version.rb +22 -1
  76. metadata +55 -35
  77. data/lib/embulk/command/embulk_example.rb +0 -33
  78. data/lib/embulk/command/embulk_generate_bin.rb +0 -62
  79. data/lib/embulk/command/embulk_migrate_plugin.rb +0 -244
  80. data/lib/embulk/command/embulk_new_plugin.rb +0 -126
  81. data/lib/embulk/command/embulk_selfupdate.rb +0 -121
  82. data/lib/embulk/data/new/README.md.erb +0 -111
  83. data/lib/embulk/data/new/java/plugin_loader.rb.erb +0 -3
  84. data/lib/embulk/data/new/java/test.java.erb +0 -5
  85. data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +0 -25
  86. data/lib/embulk/data/new/ruby/gemspec.erb +0 -20
  87. data/lib/embulk/data/new/ruby/parser_guess.rb.erb +0 -65
@@ -31,7 +31,7 @@ dependencies {
31
31
  compile 'org.slf4j:slf4j-api:1.7.12'
32
32
  compile 'org.jruby:jruby-complete:' + project.jrubyVersion
33
33
  compile 'com.google.code.findbugs:annotations:3.0.0'
34
- compile 'org.yaml:snakeyaml:1.14'
34
+ compile 'org.yaml:snakeyaml:1.18'
35
35
  compile 'javax.validation:validation-api:1.1.0.Final'
36
36
  compile 'org.apache.bval:bval-jsr303:0.5'
37
37
  compile 'io.airlift:slice:0.9'
@@ -45,7 +45,7 @@ dependencies {
45
45
 
46
46
  gems 'rubygems:bundler:1.10.6'
47
47
  gems 'rubygems:msgpack:1.1.0'
48
- gems 'rubygems:liquid:3.0.6'
48
+ gems 'rubygems:liquid:4.0.0'
49
49
  }
50
50
 
51
51
  task unpackGems(type: JRubyPrepare) {
@@ -0,0 +1,109 @@
1
+ package org.embulk;
2
+
3
+ import java.io.IOException;
4
+ import java.net.URL;
5
+ import java.security.CodeSource;
6
+ import java.security.ProtectionDomain;
7
+ import java.util.jar.Attributes;
8
+ import java.util.jar.JarFile;
9
+ import java.util.jar.Manifest;
10
+
11
+ public final class EmbulkVersion
12
+ {
13
+ private EmbulkVersion()
14
+ {
15
+ }
16
+
17
+ // Expecting Embulk is always packaged in the embulk-cli jar whenever the Embulk version is checked in Java.
18
+ static {
19
+ VERSION = getImplementationVersion(getSelfJarManifest(), "[embulk-version-unavailable]");
20
+ }
21
+
22
+ private static Manifest getSelfJarManifest()
23
+ {
24
+ try {
25
+ final ProtectionDomain protectionDomain;
26
+ try {
27
+ protectionDomain = EmbulkVersion.class.getProtectionDomain();
28
+ }
29
+ catch (SecurityException ex) {
30
+ System.err.println("Embulk version unavailable due to ProtectionDomain inaccessible.");
31
+ ex.printStackTrace();
32
+ return null;
33
+ }
34
+
35
+ final CodeSource codeSource = protectionDomain.getCodeSource();
36
+ if (codeSource == null) {
37
+ System.err.println("Embulk version unavailable due to CodeSource unavailable.");
38
+ return null;
39
+ }
40
+
41
+ final URL selfJarUrl = codeSource.getLocation();
42
+ if (selfJarUrl == null) {
43
+ System.err.println("Embulk version unavailable due to the location of CodeSource unavailable.");
44
+ return null;
45
+ }
46
+ else if (!selfJarUrl.getProtocol().equals("file")) {
47
+ System.err.println("Embulk version unavailable as the location of CodeSource is not local.");
48
+ return null;
49
+ }
50
+
51
+ final String selfJarPathString = selfJarUrl.getPath();
52
+ if (selfJarPathString == null) {
53
+ System.err.println("Embulk version unavailable due to the path of CodeSource unavailable.");
54
+ return null;
55
+ }
56
+ else if (selfJarPathString.isEmpty()) {
57
+ System.err.println("Embulk version unavailable due to the path of CodeSource empty.");
58
+ return null;
59
+ }
60
+
61
+ try (final JarFile selfJarFile = new JarFile(selfJarPathString)) {
62
+ try {
63
+ return selfJarFile.getManifest();
64
+ }
65
+ catch (IllegalStateException ex) {
66
+ System.err.println("Embulk version unavailable due to the jar file closed unexpectedly.");
67
+ ex.printStackTrace();
68
+ return null;
69
+ }
70
+ catch (IOException ex) {
71
+ System.err.println("Embulk version unavailable due to failure to get the manifst in the jar file.");
72
+ ex.printStackTrace();
73
+ return null;
74
+ }
75
+ }
76
+ catch (SecurityException ex) {
77
+ System.err.println("Embulk version unavailable due to the jar file inaccessible.");
78
+ ex.printStackTrace();
79
+ return null;
80
+ }
81
+ catch (IOException ex) {
82
+ System.err.println("Embulk version unavailable due to failure to access the jar file.");
83
+ ex.printStackTrace();
84
+ return null;
85
+ }
86
+ }
87
+ catch (Throwable ex) {
88
+ System.err.println("Embulk version unavailable due to an unknown exception.");
89
+ ex.printStackTrace();
90
+ return null;
91
+ }
92
+ }
93
+
94
+ private static String getImplementationVersion(final Manifest manifest, final String defaultVersion)
95
+ {
96
+ if (manifest == null) {
97
+ return defaultVersion;
98
+ }
99
+ final Attributes mainAttributes = manifest.getMainAttributes();
100
+ final String implementationVersion = mainAttributes.getValue(Attributes.Name.IMPLEMENTATION_VERSION);
101
+ if (implementationVersion == null) {
102
+ System.err.println("Embulk version unavailable due to the manifest not containing Implementation-Version.");
103
+ return defaultVersion;
104
+ }
105
+ return implementationVersion;
106
+ }
107
+
108
+ public static final String VERSION;
109
+ }
@@ -55,6 +55,10 @@ public class GuessExecutor
55
55
  @Config("exclude_guess_plugins")
56
56
  @ConfigDefault("[]")
57
57
  public List<PluginType> getExcludeGuessPlugins();
58
+
59
+ @Config("guess_sample_buffer_bytes")
60
+ @ConfigDefault("32768") // 32 * 1024
61
+ public int getSampleBufferBytes();
58
62
  }
59
63
 
60
64
  public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
@@ -63,6 +67,13 @@ public class GuessExecutor
63
67
  multibinder.addBinding().toInstance(type);
64
68
  }
65
69
 
70
+ // Used by FileInputRunner#guess(..)
71
+ public static ConfigSource createSampleBufferConfigFromExecConfig(ConfigSource execConfig)
72
+ {
73
+ final GuessExecutorTask execTask = execConfig.loadConfig(GuessExecutorTask.class);
74
+ return Exec.newConfigSource().set("sample_buffer_bytes", execTask.getSampleBufferBytes());
75
+ }
76
+
66
77
  @Inject
67
78
  public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
68
79
  @ForGuess Set<PluginType> defaultGuessPlugins)
@@ -12,9 +12,9 @@ import org.embulk.config.Task;
12
12
  import org.embulk.config.TaskSource;
13
13
  import org.embulk.config.ConfigSource;
14
14
  import org.embulk.config.TaskReport;
15
+ import org.embulk.exec.SamplingParserPlugin.SampleBufferTask;
15
16
  import org.embulk.plugin.PluginType;
16
17
  import org.embulk.spi.Buffer;
17
- import org.embulk.spi.FileInputPlugin;
18
18
  import org.embulk.spi.FileInputRunner;
19
19
  import org.embulk.spi.Schema;
20
20
  import org.embulk.spi.Page;
@@ -26,6 +26,7 @@ import org.embulk.spi.Exec;
26
26
  import org.embulk.spi.ExecSession;
27
27
  import org.embulk.spi.ExecAction;
28
28
  import org.embulk.spi.util.Filters;
29
+ import org.slf4j.Logger;
29
30
 
30
31
  public class PreviewExecutor
31
32
  {
@@ -35,6 +36,10 @@ public class PreviewExecutor
35
36
  public interface PreviewTask
36
37
  extends Task
37
38
  {
39
+ @Config("exec")
40
+ @ConfigDefault("{}")
41
+ public ConfigSource getExecConfig();
42
+
38
43
  @Config("in")
39
44
  @NotNull
40
45
  public ConfigSource getInputConfig();
@@ -52,6 +57,14 @@ public class PreviewExecutor
52
57
  public void setInputTask(TaskSource taskSource);
53
58
  }
54
59
 
60
+ public interface PreviewExecutorTask
61
+ extends Task
62
+ {
63
+ @Config("preview_sample_buffer_bytes")
64
+ @ConfigDefault("32768") // 32 * 1024
65
+ public int getSampleBufferBytes();
66
+ }
67
+
55
68
  @Inject
56
69
  public PreviewExecutor(Injector injector,
57
70
  @ForSystemConfig ConfigSource systemConfig)
@@ -93,7 +106,7 @@ public class PreviewExecutor
93
106
  List<FilterPlugin> filterPlugins = newFilterPlugins(task);
94
107
 
95
108
  if (inputPlugin instanceof FileInputRunner) { // file input runner
96
- Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"));
109
+ Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"), createSampleBufferConfigFromExecConfig(config.getNested("exec")));
97
110
  FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample));
98
111
  return doPreview(task, previewRunner, filterPlugins);
99
112
  }
@@ -102,6 +115,12 @@ public class PreviewExecutor
102
115
  }
103
116
  }
104
117
 
118
+ private static ConfigSource createSampleBufferConfigFromExecConfig(ConfigSource execConfig)
119
+ {
120
+ final PreviewExecutorTask execTask = execConfig.loadConfig(PreviewExecutorTask.class);
121
+ return Exec.newConfigSource().set("sample_buffer_bytes", execTask.getSampleBufferBytes());
122
+ }
123
+
105
124
  private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins)
106
125
  {
107
126
  try {
@@ -144,16 +163,19 @@ public class PreviewExecutor
144
163
  private static class SamplingPageOutput
145
164
  implements PageOutput
146
165
  {
166
+ private final Logger log = Exec.getLogger(this.getClass());
147
167
  private final int sampleRows;
148
168
  private final Schema schema;
149
169
  private List<Page> pages;
150
170
  private int recordCount;
171
+ private PreviewResult res;
151
172
 
152
173
  public SamplingPageOutput(int sampleRows, Schema schema)
153
174
  {
154
175
  this.sampleRows = sampleRows;
155
176
  this.schema = schema;
156
177
  this.pages = new ArrayList<Page>();
178
+ this.res = null;
157
179
  }
158
180
 
159
181
  public int getRecordCount()
@@ -174,10 +196,14 @@ public class PreviewExecutor
174
196
  @Override
175
197
  public void finish()
176
198
  {
199
+ if (res != null) {
200
+ log.error("PreviewResult recreation will cause a bug. The plugin must call PageOutput#finish() only once.");
201
+ }
202
+
177
203
  if (recordCount == 0) {
178
204
  throw new NoSampleException("No input records to preview");
179
205
  }
180
- PreviewResult res = new PreviewResult(schema, pages);
206
+ res = new PreviewResult(schema, pages);
181
207
  pages = null;
182
208
  throw new PreviewedNoticeError(res);
183
209
  }
@@ -1,8 +1,13 @@
1
1
  package org.embulk.exec;
2
2
 
3
+ import java.text.NumberFormat;
3
4
  import java.util.List;
5
+
4
6
  import com.google.inject.Inject;
5
7
  import com.google.common.base.Preconditions;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigDefault;
10
+ import org.embulk.config.Task;
6
11
  import org.embulk.config.TaskSource;
7
12
  import org.embulk.config.ConfigSource;
8
13
  import org.embulk.config.TaskReport;
@@ -15,6 +20,9 @@ import org.embulk.spi.ParserPlugin;
15
20
  import org.embulk.spi.FileInput;
16
21
  import org.embulk.spi.FileInputRunner;
17
22
  import org.embulk.spi.PageOutput;
23
+ import org.slf4j.Logger;
24
+
25
+ import static java.util.Locale.ENGLISH;
18
26
  import static org.embulk.spi.util.Inputs.each;
19
27
 
20
28
  /*
@@ -25,9 +33,18 @@ public class SamplingParserPlugin
25
33
  {
26
34
  public static Buffer runFileInputSampling(final FileInputRunner runner, ConfigSource inputConfig)
27
35
  {
36
+ return runFileInputSampling(runner, inputConfig, Exec.newConfigSource());
37
+ }
38
+
39
+ public static Buffer runFileInputSampling(final FileInputRunner runner, ConfigSource inputConfig, ConfigSource sampleBufferConfig)
40
+ {
41
+ final SampleBufferTask sampleBufferTask = sampleBufferConfig.loadConfig(SampleBufferTask.class);
42
+
28
43
  // override in.parser.type so that FileInputRunner creates SamplingParserPlugin
29
44
  ConfigSource samplingInputConfig = inputConfig.deepCopy();
30
- samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
45
+ samplingInputConfig.getNestedOrSetEmpty("parser")
46
+ .set("type", "system_sampling")
47
+ .set("sample_buffer_bytes", sampleBufferTask.getSampleBufferBytes());
31
48
  samplingInputConfig.set("decoders", null);
32
49
 
33
50
  try {
@@ -120,42 +137,59 @@ public class SamplingParserPlugin
120
137
  }
121
138
  }
122
139
 
123
- private final int minSampleSize;
124
- private final int sampleSize;
140
+ private final NumberFormat numberFormat = NumberFormat.getNumberInstance(ENGLISH);
141
+ private final Logger log = Exec.getLogger(this.getClass());
142
+ private final int minSampleBufferBytes;
143
+
144
+ public interface PluginTask
145
+ extends Task, SampleBufferTask
146
+ {
147
+ }
148
+
149
+ public interface SampleBufferTask
150
+ extends Task
151
+ {
152
+ @Config("sample_buffer_bytes")
153
+ @ConfigDefault("32768") // 32 * 1024
154
+ public int getSampleBufferBytes();
155
+ }
125
156
 
126
157
  @Inject
127
158
  public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
128
159
  {
129
- this.minSampleSize = 40; // empty gzip file is 33 bytes. // TODO get sample size from system config
130
- this.sampleSize = 32*1024; // TODO get sample size from system config. See also GuessExecutor.run.
131
- Preconditions.checkArgument(minSampleSize < sampleSize, "minSampleSize must be smaller than sampleSize");
160
+ this.minSampleBufferBytes = 40; // empty gzip file is 33 bytes. // TODO get sample size from system config
132
161
  }
133
162
 
134
163
  @Override
135
164
  public void transaction(ConfigSource config, ParserPlugin.Control control)
136
165
  {
137
- control.run(Exec.newTaskSource(), null);
166
+ PluginTask task = config.loadConfig(PluginTask.class);
167
+ Preconditions.checkArgument(minSampleBufferBytes < task.getSampleBufferBytes(), "minSampleBufferBytes must be smaller than sample_buffer_bytes");
168
+
169
+ log.info("Try to read {} bytes from input source", numberFormat.format(task.getSampleBufferBytes()));
170
+ control.run(task.dump(), null);
138
171
  }
139
172
 
140
173
  @Override
141
174
  public void run(TaskSource taskSource, Schema schema,
142
175
  FileInput input, PageOutput output)
143
176
  {
144
- Buffer buffer = readSample(input, sampleSize);
177
+ PluginTask task = taskSource.loadTask(PluginTask.class);
178
+ Buffer buffer = readSample(input, task.getSampleBufferBytes());
145
179
  if (!taskSource.get(boolean.class, "force", false)) {
146
- if (buffer.limit() < minSampleSize) {
180
+ if (buffer.limit() < minSampleBufferBytes) {
147
181
  throw new NotEnoughSampleError(buffer.limit());
148
182
  }
149
183
  }
150
184
  throw new SampledNoticeError(buffer);
151
185
  }
152
186
 
153
- public static Buffer readSample(FileInput fileInput, int sampleSize)
187
+ public static Buffer readSample(FileInput fileInput, int sampleBufferBytes)
154
188
  {
155
- return readSample(fileInput, Buffer.allocate(sampleSize), 0, sampleSize);
189
+ return readSample(fileInput, Buffer.allocate(sampleBufferBytes), 0, sampleBufferBytes);
156
190
  }
157
191
 
158
- public static Buffer readSample(FileInput fileInput, Buffer sample, int offset, int sampleSize)
192
+ public static Buffer readSample(FileInput fileInput, Buffer sample, int offset, int sampleBufferBytes)
159
193
  {
160
194
  if (!fileInput.nextFile()) {
161
195
  // no input files
@@ -168,7 +202,7 @@ public class SamplingParserPlugin
168
202
  sample.setBytes(offset, buffer, 0, size);
169
203
  offset += size;
170
204
  buffer.release();
171
- if (offset >= sampleSize) {
205
+ if (offset >= sampleBufferBytes) {
172
206
  break;
173
207
  }
174
208
  }
@@ -17,6 +17,8 @@ import org.embulk.exec.SamplingParserPlugin;
17
17
  import org.embulk.exec.ConfigurableGuessInputPlugin;
18
18
  import org.embulk.exec.NoSampleException;
19
19
 
20
+ import static org.embulk.exec.GuessExecutor.createSampleBufferConfigFromExecConfig;
21
+
20
22
  public class FileInputRunner
21
23
  implements InputPlugin, ConfigurableGuessInputPlugin
22
24
  {
@@ -80,14 +82,15 @@ public class FileInputRunner
80
82
  return guess(Exec.newConfigSource(), config);
81
83
  }
82
84
 
83
- public ConfigDiff guess(ConfigSource execConfig, ConfigSource config)
85
+ public ConfigDiff guess(ConfigSource execConfig, ConfigSource inputConfig)
84
86
  {
85
- Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
87
+ final ConfigSource sampleBufferConfig = createSampleBufferConfigFromExecConfig(execConfig);
88
+ final Buffer sample = SamplingParserPlugin.runFileInputSampling(this, inputConfig, sampleBufferConfig);
86
89
  // SamplingParserPlugin.runFileInputSampling throws NoSampleException if there're
87
90
  // no files or all files are smaller than minSampleSize (40 bytes).
88
91
 
89
92
  GuessExecutor guessExecutor = Exec.getInjector().getInstance(GuessExecutor.class);
90
- return guessExecutor.guessParserConfig(sample, config, execConfig);
93
+ return guessExecutor.guessParserConfig(sample, inputConfig, execConfig);
91
94
  }
92
95
 
93
96
  private class RunnerControl
@@ -1,16 +1,12 @@
1
1
  package org.embulk.spi;
2
2
 
3
- import java.io.Serializable;
4
- import java.util.Map;
5
3
  import java.util.List;
6
4
  import java.util.Arrays;
7
5
  import java.util.ArrayList;
8
- import java.util.Comparator;
9
- import java.util.Collections;
10
- import com.google.common.collect.BiMap;
11
- import com.google.common.collect.HashBiMap;
12
6
  import io.airlift.slice.Slice;
13
7
  import io.airlift.slice.Slices;
8
+ import org.embulk.spi.type.Type;
9
+ import org.embulk.spi.type.Types;
14
10
  import org.msgpack.value.Value;
15
11
  import org.msgpack.value.ImmutableValue;
16
12
  import org.embulk.spi.time.Timestamp;
@@ -30,7 +26,8 @@ public class PageBuilder
30
26
  private int count;
31
27
  private int position;
32
28
  private final byte[] nullBitSet;
33
- private final BiMap<String, Integer> stringReferences = HashBiMap.create();
29
+ private final Row row;
30
+ private List<String> stringReferences = new ArrayList<>();
34
31
  private List<ImmutableValue> valueReferences = new ArrayList<>();
35
32
  private int referenceSize;
36
33
  private int nextVariableLengthDataOffset;
@@ -43,6 +40,7 @@ public class PageBuilder
43
40
  this.columnOffsets = PageFormat.columnOffsets(schema);
44
41
  this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
45
42
  Arrays.fill(nullBitSet, (byte) -1);
43
+ this.row = Row.newRow(schema);
46
44
  this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
47
45
  this.nextVariableLengthDataOffset = fixedRecordSize;
48
46
  newBuffer();
@@ -54,7 +52,7 @@ public class PageBuilder
54
52
  this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity());
55
53
  this.count = 0;
56
54
  this.position = PageFormat.PAGE_HEADER_SIZE;
57
- this.stringReferences.clear();
55
+ this.stringReferences = new ArrayList<>();
58
56
  this.valueReferences = new ArrayList<>();
59
57
  this.referenceSize = 0;
60
58
  }
@@ -71,12 +69,8 @@ public class PageBuilder
71
69
 
72
70
  public void setNull(int columnIndex)
73
71
  {
74
- nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
75
- }
72
+ row.setNull(columnIndex);
76
73
 
77
- private void clearNull(int columnIndex)
78
- {
79
- nullBitSet[columnIndex >>> 3] &= ~(1 << (columnIndex & 7));
80
74
  }
81
75
 
82
76
  public void setBoolean(Column column, boolean value)
@@ -87,8 +81,7 @@ public class PageBuilder
87
81
 
88
82
  public void setBoolean(int columnIndex, boolean value)
89
83
  {
90
- bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
91
- clearNull(columnIndex);
84
+ row.setBoolean(columnIndex, value);
92
85
  }
93
86
 
94
87
  public void setLong(Column column, long value)
@@ -99,8 +92,7 @@ public class PageBuilder
99
92
 
100
93
  public void setLong(int columnIndex, long value)
101
94
  {
102
- bufferSlice.setLong(getOffset(columnIndex), value);
103
- clearNull(columnIndex);
95
+ row.setLong(columnIndex, value);
104
96
  }
105
97
 
106
98
  public void setDouble(Column column, double value)
@@ -111,8 +103,7 @@ public class PageBuilder
111
103
 
112
104
  public void setDouble(int columnIndex, double value)
113
105
  {
114
- bufferSlice.setDouble(getOffset(columnIndex), value);
115
- clearNull(columnIndex);
106
+ row.setDouble(columnIndex, value);
116
107
  }
117
108
 
118
109
  public void setString(Column column, String value)
@@ -125,19 +116,10 @@ public class PageBuilder
125
116
  {
126
117
  if (value == null) {
127
118
  setNull(columnIndex);
128
- return;
129
119
  }
130
-
131
- Integer reuseIndex = stringReferences.get(value);
132
- if (reuseIndex != null) {
133
- bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
134
- } else {
135
- int index = stringReferences.size();
136
- stringReferences.put(value, index);
137
- bufferSlice.setInt(getOffset(columnIndex), index);
138
- referenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
120
+ else {
121
+ row.setString(columnIndex, value);
139
122
  }
140
- clearNull(columnIndex);
141
123
  }
142
124
 
143
125
  public void setJson(Column column, Value value)
@@ -150,14 +132,10 @@ public class PageBuilder
150
132
  {
151
133
  if (value == null) {
152
134
  setNull(columnIndex);
153
- return;
154
135
  }
155
-
156
- int index = valueReferences.size();
157
- valueReferences.add(value.immutableValue());
158
- bufferSlice.setInt(getOffset(columnIndex), index);
159
- referenceSize += 256; // TODO how to estimate size of the value?
160
- clearNull(columnIndex);
136
+ else {
137
+ row.setJson(columnIndex, value);
138
+ }
161
139
  }
162
140
 
163
141
  public void setTimestamp(Column column, Timestamp value)
@@ -170,49 +148,76 @@ public class PageBuilder
170
148
  {
171
149
  if (value == null) {
172
150
  setNull(columnIndex);
173
- return;
174
151
  }
152
+ else {
153
+ row.setTimestamp(columnIndex, value);
154
+ }
155
+ }
175
156
 
176
- int offset = getOffset(columnIndex);
177
- bufferSlice.setLong(offset, value.getEpochSecond());
178
- bufferSlice.setInt(offset + 8, value.getNano());
157
+ private void writeNull(int columnIndex)
158
+ {
159
+ nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
160
+ }
161
+
162
+ private void clearNull(int columnIndex)
163
+ {
164
+ nullBitSet[columnIndex >>> 3] &= ~(1 << (columnIndex & 7));
165
+ }
166
+
167
+ private void writeBoolean(int columnIndex, boolean value)
168
+ {
169
+ bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
179
170
  clearNull(columnIndex);
180
171
  }
181
172
 
182
- private int getOffset(int columnIndex)
173
+ private void writeLong(int columnIndex, long value)
183
174
  {
184
- return position + columnOffsets[columnIndex];
175
+ bufferSlice.setLong(getOffset(columnIndex), value);
176
+ clearNull(columnIndex);
185
177
  }
186
178
 
187
- private static class StringReferenceSortComparator
188
- implements Comparator<Map.Entry<String, Integer>>, Serializable
179
+ private void writeDouble(int columnIndex, double value)
189
180
  {
190
- @Override
191
- public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2)
192
- {
193
- return e1.getValue().compareTo(e2.getValue());
194
- }
181
+ bufferSlice.setDouble(getOffset(columnIndex), value);
182
+ clearNull(columnIndex);
183
+ }
195
184
 
196
- @Override
197
- public boolean equals(Object obj)
198
- {
199
- return obj instanceof StringReferenceSortComparator;
200
- }
185
+ private void writeString(int columnIndex, String value)
186
+ {
187
+ int index = stringReferences.size();
188
+ stringReferences.add(value);
189
+ bufferSlice.setInt(getOffset(columnIndex), index);
190
+ referenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
191
+ clearNull(columnIndex);
201
192
  }
202
193
 
203
- private List<String> getSortedStringReferences()
194
+ private void writeJson(int columnIndex, Value value)
204
195
  {
205
- ArrayList<Map.Entry<String, Integer>> s = new ArrayList<>(stringReferences.entrySet());
206
- Collections.sort(s, new StringReferenceSortComparator());
207
- String[] array = new String[s.size()];
208
- for (int i=0; i < array.length; i++) {
209
- array[i] = s.get(i).getKey();
210
- }
211
- return Arrays.asList(array);
196
+ int index = valueReferences.size();
197
+ valueReferences.add(value.immutableValue());
198
+ bufferSlice.setInt(getOffset(columnIndex), index);
199
+ referenceSize += 256; // TODO how to estimate size of the value?
200
+ clearNull(columnIndex);
201
+ }
202
+
203
+ private void writeTimestamp(int columnIndex, Timestamp value)
204
+ {
205
+ int offset = getOffset(columnIndex);
206
+ bufferSlice.setLong(offset, value.getEpochSecond());
207
+ bufferSlice.setInt(offset + 8, value.getNano());
208
+ clearNull(columnIndex);
209
+ }
210
+
211
+ private int getOffset(int columnIndex)
212
+ {
213
+ return position + columnOffsets[columnIndex];
212
214
  }
213
215
 
214
216
  public void addRecord()
215
217
  {
218
+ // record
219
+ row.write(this);
220
+
216
221
  // record header
217
222
  bufferSlice.setInt(position, nextVariableLengthDataOffset); // nextVariableLengthDataOffset means record size
218
223
  bufferSlice.setBytes(position + 4, nullBitSet);
@@ -237,7 +242,7 @@ public class PageBuilder
237
242
 
238
243
  // flush page
239
244
  Page page = Page.wrap(buffer)
240
- .setStringReferences(getSortedStringReferences())
245
+ .setStringReferences(stringReferences)
241
246
  .setValueReferences(valueReferences);
242
247
  buffer = null;
243
248
  bufferSlice = null;
@@ -270,6 +275,322 @@ public class PageBuilder
270
275
  output.close();
271
276
  }
272
277
 
278
+ /**
279
+ * Row is a container to stage values before adding into reference lists such as |stringReferences|.
280
+ *
281
+ * |Row| works as a buffer against plugins that may add values incorrectly without |PageBuilder#addRecord|.
282
+ * It accepts just one value per column while |PageBuilder| can double-store values regardless of columns.
283
+ * Double-stored values are overwritten.
284
+ */
285
+ private static class Row
286
+ {
287
+ private static Row newRow(Schema schema)
288
+ {
289
+ ColumnValue[] values = new ColumnValue[schema.getColumnCount()];
290
+ for (Column column : schema.getColumns()) {
291
+ values[column.getIndex()] = newValue(column);
292
+ }
293
+ return new Row(values);
294
+ }
295
+
296
+ private static ColumnValue newValue(Column column)
297
+ {
298
+ Type type = column.getType();
299
+ if (type.equals(Types.BOOLEAN)) {
300
+ return new BooleanColumnValue(column);
301
+ }
302
+ else if (type.equals(Types.DOUBLE)) {
303
+ return new DoubleColumnValue(column);
304
+ }
305
+ else if (type.equals(Types.LONG)) {
306
+ return new LongColumnValue(column);
307
+ }
308
+ else if (type.equals(Types.STRING)) {
309
+ return new StringColumnValue(column);
310
+ }
311
+ else if (type.equals(Types.JSON)) {
312
+ return new JsonColumnValue(column);
313
+ }
314
+ else if (type.equals(Types.TIMESTAMP)) {
315
+ return new TimestampColumnValue(column);
316
+ }
317
+ else {
318
+ throw new IllegalStateException("Unsupported type " + type.getName());
319
+ }
320
+ }
321
+
322
+ private final ColumnValue[] values;
323
+
324
+ private Row(ColumnValue[] values)
325
+ {
326
+ this.values = values;
327
+ }
328
+
329
+ private void setNull(int columnIndex)
330
+ {
331
+ values[columnIndex].setNull();
332
+ }
333
+
334
+ private void setBoolean(int columnIndex, boolean value)
335
+ {
336
+ values[columnIndex].setBoolean(value);
337
+ }
338
+
339
+ private void setLong(int columnIndex, long value)
340
+ {
341
+ values[columnIndex].setLong(value);
342
+ }
343
+
344
+ private void setDouble(int columnIndex, double value)
345
+ {
346
+ values[columnIndex].setDouble(value);
347
+ }
348
+
349
+ private void setString(int columnIndex, String value)
350
+ {
351
+ values[columnIndex].setString(value);
352
+ }
353
+
354
+ private void setJson(int columnIndex, Value value)
355
+ {
356
+ values[columnIndex].setJson(value);
357
+ }
358
+
359
+ private void setTimestamp(int columnIndex, Timestamp value)
360
+ {
361
+ values[columnIndex].setTimestamp(value);
362
+ }
363
+
364
+ private void write(PageBuilder pageBuilder)
365
+ {
366
+ for (ColumnValue v : values) {
367
+ v.write(pageBuilder);
368
+ }
369
+ }
370
+ }
371
+
372
+ private interface ColumnValue
373
+ {
374
+ void setBoolean(boolean value);
375
+
376
+ void setLong(long value);
377
+
378
+ void setDouble(double value);
379
+
380
+ void setString(String value);
381
+
382
+ void setJson(Value value);
383
+
384
+ void setTimestamp(Timestamp value);
385
+
386
+ void setNull();
387
+
388
+ void write(PageBuilder pageBuilder);
389
+ }
390
+
391
+ private static abstract class AbstractColumnValue
392
+ implements ColumnValue
393
+ {
394
+ protected final Column column;
395
+ protected boolean isNull;
396
+
397
+ protected AbstractColumnValue(Column column)
398
+ {
399
+ this.column = column;
400
+ }
401
+
402
+ public void setBoolean(boolean value)
403
+ {
404
+ throw new IllegalStateException("Not reach here");
405
+ }
406
+
407
+ public void setLong(long value)
408
+ {
409
+ throw new IllegalStateException("Not reach here");
410
+ }
411
+
412
+ public void setDouble(double value)
413
+ {
414
+ throw new IllegalStateException("Not reach here");
415
+ }
416
+
417
+ public void setString(String value)
418
+ {
419
+ throw new IllegalStateException("Not reach here");
420
+ }
421
+
422
+ public void setJson(Value value)
423
+ {
424
+ throw new IllegalStateException("Not reach here");
425
+ }
426
+
427
+ public void setTimestamp(Timestamp value)
428
+ {
429
+ throw new IllegalStateException("Not reach here");
430
+ }
431
+
432
+ public void setNull()
433
+ {
434
+ isNull = true;
435
+ }
436
+
437
+ public void write(PageBuilder pageBuilder)
438
+ {
439
+ if (!isNull) {
440
+ writeNotNull(pageBuilder);
441
+ }
442
+ else {
443
+ pageBuilder.writeNull(column.getIndex());
444
+ }
445
+ }
446
+
447
+ protected abstract void writeNotNull(PageBuilder pageBuilder);
448
+ }
449
+
450
+ private static class BooleanColumnValue
451
+ extends AbstractColumnValue
452
+ {
453
+ private boolean value;
454
+
455
+ BooleanColumnValue(Column column)
456
+ {
457
+ super(column);
458
+ }
459
+
460
+ @Override
461
+ public void setBoolean(boolean value)
462
+ {
463
+ this.value = value;
464
+ this.isNull = false;
465
+ }
466
+
467
+ @Override
468
+ public void writeNotNull(PageBuilder pageBuilder)
469
+ {
470
+ pageBuilder.writeBoolean(column.getIndex(), value);
471
+ }
472
+ }
473
+
474
+ private static class LongColumnValue
475
+ extends AbstractColumnValue
476
+ {
477
+ private long value;
478
+
479
+ LongColumnValue(Column column)
480
+ {
481
+ super(column);
482
+ }
483
+
484
+ @Override
485
+ public void setLong(long value)
486
+ {
487
+ this.value = value;
488
+ this.isNull = false;
489
+ }
490
+
491
+ @Override
492
+ public void writeNotNull(PageBuilder pageBuilder)
493
+ {
494
+ pageBuilder.writeLong(column.getIndex(), value);
495
+ }
496
+ }
497
+
498
+ private static class DoubleColumnValue
499
+ extends AbstractColumnValue
500
+ {
501
+ private double value;
502
+
503
+ DoubleColumnValue(Column column)
504
+ {
505
+ super(column);
506
+ }
507
+
508
+ @Override
509
+ public void setDouble(double value)
510
+ {
511
+ this.value = value;
512
+ this.isNull = false;
513
+ }
514
+
515
+ @Override
516
+ public void writeNotNull(PageBuilder pageBuilder)
517
+ {
518
+ pageBuilder.writeDouble(column.getIndex(), value);
519
+ }
520
+ }
521
+
522
+ private static class StringColumnValue
523
+ extends AbstractColumnValue
524
+ {
525
+ private String value;
526
+
527
+ StringColumnValue(Column column)
528
+ {
529
+ super(column);
530
+ }
531
+
532
+ @Override
533
+ public void setString(String value)
534
+ {
535
+ this.value = value;
536
+ this.isNull = false;
537
+ }
538
+
539
+ @Override
540
+ public void writeNotNull(PageBuilder pageBuilder)
541
+ {
542
+ pageBuilder.writeString(column.getIndex(), value);
543
+ }
544
+ }
545
+
546
+ private static class JsonColumnValue
547
+ extends AbstractColumnValue
548
+ {
549
+ private Value value;
550
+
551
+ JsonColumnValue(Column column)
552
+ {
553
+ super(column);
554
+ }
555
+
556
+ @Override
557
+ public void setJson(Value value)
558
+ {
559
+ this.value = value;
560
+ this.isNull = false;
561
+ }
562
+
563
+ @Override
564
+ public void writeNotNull(PageBuilder pageBuilder)
565
+ {
566
+ pageBuilder.writeJson(column.getIndex(), value);
567
+ }
568
+ }
569
+
570
+ private static class TimestampColumnValue
571
+ extends AbstractColumnValue
572
+ {
573
+ private Timestamp value;
574
+
575
+ TimestampColumnValue(Column column)
576
+ {
577
+ super(column);
578
+ }
579
+
580
+ @Override
581
+ public void setTimestamp(Timestamp value)
582
+ {
583
+ this.value = value;
584
+ this.isNull = false;
585
+ }
586
+
587
+ @Override
588
+ public void writeNotNull(PageBuilder pageBuilder)
589
+ {
590
+ pageBuilder.writeTimestamp(column.getIndex(), value);
591
+ }
592
+ }
593
+
273
594
  /* TODO for variable-length types
274
595
  private void flushAndTakeOverRemaingData()
275
596
  {