embulk 0.8.9-java → 0.8.10-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java +88 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +0 -76
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +2 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +18 -5
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +2 -3
- data/embulk-docs/build.gradle +4 -4
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.10.rst +35 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -13
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +108 -19
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +2 -2
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +27 -6
- data/embulk.gemspec +1 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +1 -1
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +39 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a7366cd20cfc525461f9cd341f277cefde3b830
|
4
|
+
data.tar.gz: 23301c5e13ab5d3be273df7187f28a87b727b46b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3687badaebab213a269d1b2685498c596f70c94f4829b57dec01f7b6358f2b758a035d572fc1e021e7c52c4fdab3c337d1cfc5a24aadca5e2ec87899ddb71949
|
7
|
+
data.tar.gz: 7b8047ebd25abefb9696df4bde8133bd33ba197884891675bc78124e3a4347f0d4757fb0f9739b64b673ecc5e11541644cf3432b528d199049112e601ec162a0
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.8.
|
5
|
-
jruby-jars (= 9.
|
4
|
+
embulk (0.8.9)
|
5
|
+
jruby-jars (= 9.1.2.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.
|
10
|
+
jruby-jars (9.1.2.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
@@ -27,4 +27,4 @@ DEPENDENCIES
|
|
27
27
|
yard (~> 0.8.7)
|
28
28
|
|
29
29
|
BUNDLED WITH
|
30
|
-
1.
|
30
|
+
1.12.4
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.10'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.
|
22
|
+
jrubyVersion = '9.1.2.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -38,7 +38,7 @@ dependencies {
|
|
38
38
|
compile 'joda-time:joda-time:2.9.2'
|
39
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
40
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
-
compile 'org.msgpack:msgpack-core:0.8.
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.8'
|
42
42
|
|
43
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
44
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -0,0 +1,88 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.config.TaskReport;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.spi.Buffer;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.FileInputPlugin;
|
10
|
+
import org.embulk.spi.TransactionalFileInput;
|
11
|
+
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
public class BufferFileInputPlugin
|
15
|
+
implements FileInputPlugin
|
16
|
+
{
|
17
|
+
private Buffer buffer;
|
18
|
+
|
19
|
+
public BufferFileInputPlugin(Buffer buffer)
|
20
|
+
{
|
21
|
+
this.buffer = buffer;
|
22
|
+
}
|
23
|
+
|
24
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
25
|
+
{
|
26
|
+
control.run(Exec.newTaskSource(), 1);
|
27
|
+
return Exec.newConfigDiff();
|
28
|
+
}
|
29
|
+
|
30
|
+
public ConfigDiff resume(TaskSource taskSource,
|
31
|
+
int taskCount,
|
32
|
+
FileInputPlugin.Control control)
|
33
|
+
{
|
34
|
+
throw new UnsupportedOperationException();
|
35
|
+
}
|
36
|
+
|
37
|
+
public void cleanup(TaskSource taskSource,
|
38
|
+
int taskCount,
|
39
|
+
List<TaskReport> successTaskReports)
|
40
|
+
{
|
41
|
+
if (buffer != null) {
|
42
|
+
buffer.release();
|
43
|
+
buffer = null;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
48
|
+
{
|
49
|
+
return new BufferTransactionalFileInput(buffer);
|
50
|
+
}
|
51
|
+
|
52
|
+
private static class BufferTransactionalFileInput
|
53
|
+
implements TransactionalFileInput
|
54
|
+
{
|
55
|
+
private Buffer buffer;
|
56
|
+
|
57
|
+
public BufferTransactionalFileInput(Buffer buffer)
|
58
|
+
{
|
59
|
+
this.buffer = buffer;
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public Buffer poll()
|
64
|
+
{
|
65
|
+
Buffer b = buffer;
|
66
|
+
buffer = null;
|
67
|
+
return b;
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public boolean nextFile()
|
72
|
+
{
|
73
|
+
return buffer != null;
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void close() { }
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void abort() { }
|
81
|
+
|
82
|
+
@Override
|
83
|
+
public TaskReport commit()
|
84
|
+
{
|
85
|
+
return null;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
@@ -31,7 +31,7 @@ public class ExecModule
|
|
31
31
|
binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
|
32
32
|
binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
|
33
33
|
|
34
|
-
// GuessExecutor
|
34
|
+
// GuessExecutor, PreviewExecutor
|
35
35
|
registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
|
36
36
|
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
37
37
|
|
@@ -191,82 +191,6 @@ public class GuessExecutor
|
|
191
191
|
return lastGuessed;
|
192
192
|
}
|
193
193
|
|
194
|
-
private static class BufferFileInputPlugin
|
195
|
-
implements FileInputPlugin
|
196
|
-
{
|
197
|
-
private Buffer buffer;
|
198
|
-
|
199
|
-
public BufferFileInputPlugin(Buffer buffer)
|
200
|
-
{
|
201
|
-
this.buffer = buffer;
|
202
|
-
}
|
203
|
-
|
204
|
-
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
205
|
-
{
|
206
|
-
control.run(Exec.newTaskSource(), 1);
|
207
|
-
return Exec.newConfigDiff();
|
208
|
-
}
|
209
|
-
|
210
|
-
public ConfigDiff resume(TaskSource taskSource,
|
211
|
-
int taskCount,
|
212
|
-
FileInputPlugin.Control control)
|
213
|
-
{
|
214
|
-
throw new UnsupportedOperationException();
|
215
|
-
}
|
216
|
-
|
217
|
-
public void cleanup(TaskSource taskSource,
|
218
|
-
int taskCount,
|
219
|
-
List<TaskReport> successTaskReports)
|
220
|
-
{
|
221
|
-
if (buffer != null) {
|
222
|
-
buffer.release();
|
223
|
-
buffer = null;
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
228
|
-
{
|
229
|
-
return new BufferTransactionalFileInput(buffer);
|
230
|
-
}
|
231
|
-
}
|
232
|
-
|
233
|
-
private static class BufferTransactionalFileInput
|
234
|
-
implements TransactionalFileInput
|
235
|
-
{
|
236
|
-
private Buffer buffer;
|
237
|
-
|
238
|
-
public BufferTransactionalFileInput(Buffer buffer)
|
239
|
-
{
|
240
|
-
this.buffer = buffer;
|
241
|
-
}
|
242
|
-
|
243
|
-
@Override
|
244
|
-
public Buffer poll()
|
245
|
-
{
|
246
|
-
Buffer b = buffer;
|
247
|
-
buffer = null;
|
248
|
-
return b;
|
249
|
-
}
|
250
|
-
|
251
|
-
@Override
|
252
|
-
public boolean nextFile()
|
253
|
-
{
|
254
|
-
return buffer != null;
|
255
|
-
}
|
256
|
-
|
257
|
-
@Override
|
258
|
-
public void close() { }
|
259
|
-
|
260
|
-
@Override
|
261
|
-
public void abort() { }
|
262
|
-
|
263
|
-
@Override
|
264
|
-
public TaskReport commit()
|
265
|
-
{
|
266
|
-
return null;
|
267
|
-
}
|
268
|
-
}
|
269
|
-
|
270
194
|
public static class GuessParserPlugin
|
271
195
|
implements ParserPlugin
|
272
196
|
{
|
@@ -301,6 +301,7 @@ public class LocalExecutorPlugin
|
|
301
301
|
|
302
302
|
// outputCommitted
|
303
303
|
tran.commit();
|
304
|
+
aborter.dontAbort();
|
304
305
|
}
|
305
306
|
}
|
306
307
|
finally {
|
@@ -551,6 +552,7 @@ public class LocalExecutorPlugin
|
|
551
552
|
catch (InterruptedException ex) {
|
552
553
|
error = ex;
|
553
554
|
}
|
555
|
+
outputWorkers[i] = null;
|
554
556
|
if (error != null) {
|
555
557
|
throw Throwables.propagate(error);
|
556
558
|
}
|
@@ -13,6 +13,9 @@ import org.embulk.config.TaskSource;
|
|
13
13
|
import org.embulk.config.ConfigSource;
|
14
14
|
import org.embulk.config.TaskReport;
|
15
15
|
import org.embulk.plugin.PluginType;
|
16
|
+
import org.embulk.spi.Buffer;
|
17
|
+
import org.embulk.spi.FileInputPlugin;
|
18
|
+
import org.embulk.spi.FileInputRunner;
|
16
19
|
import org.embulk.spi.Schema;
|
17
20
|
import org.embulk.spi.Page;
|
18
21
|
import org.embulk.spi.PageOutput;
|
@@ -85,10 +88,22 @@ public class PreviewExecutor
|
|
85
88
|
|
86
89
|
private PreviewResult doPreview(ConfigSource config)
|
87
90
|
{
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
+
PreviewTask task = config.loadConfig(PreviewTask.class);
|
92
|
+
InputPlugin inputPlugin = newInputPlugin(task);
|
93
|
+
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
94
|
+
|
95
|
+
if (inputPlugin instanceof FileInputRunner) { // file input runner
|
96
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"));
|
97
|
+
FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample));
|
98
|
+
return doPreview(task, previewRunner, filterPlugins);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
return doPreview(task, inputPlugin, filterPlugins);
|
102
|
+
}
|
103
|
+
}
|
91
104
|
|
105
|
+
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins)
|
106
|
+
{
|
92
107
|
try {
|
93
108
|
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
94
109
|
public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
|
@@ -96,8 +111,6 @@ public class PreviewExecutor
|
|
96
111
|
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
97
112
|
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
98
113
|
{
|
99
|
-
InputPlugin input = newInputPlugin(task);
|
100
|
-
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
101
114
|
Schema inputSchema = filterSchemas.get(0);
|
102
115
|
Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
|
103
116
|
|
@@ -83,9 +83,8 @@ public class FileInputRunner
|
|
83
83
|
public ConfigDiff guess(ConfigSource execConfig, ConfigSource config)
|
84
84
|
{
|
85
85
|
Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
|
86
|
-
|
87
|
-
|
88
|
-
}
|
86
|
+
// SamplingParserPlugin.runFileInputSampling throws NoSampleException if there're
|
87
|
+
// no files or all files are smaller than minSampleSize (40 bytes).
|
89
88
|
|
90
89
|
GuessExecutor guessExecutor = Exec.getInjector().getInstance(GuessExecutor.class);
|
91
90
|
return guessExecutor.guessParserConfig(sample, config, execConfig);
|
data/embulk-docs/build.gradle
CHANGED
@@ -9,24 +9,24 @@ dependencies {
|
|
9
9
|
jrubyExec 'rubygems:yard:0.8.7.6'
|
10
10
|
}
|
11
11
|
|
12
|
-
task
|
12
|
+
task sphinxHtml(type: Exec) {
|
13
13
|
workingDir '.'
|
14
14
|
commandLine 'make'
|
15
15
|
args 'html'
|
16
16
|
}
|
17
17
|
|
18
|
-
task
|
18
|
+
task javadocHtml(type: Copy, dependsOn: [':embulk-core:javadoc']) {
|
19
19
|
doFirst { file('build/html/javadoc').mkdirs() }
|
20
20
|
from project(':embulk-core').javadoc.destinationDir
|
21
21
|
into 'build/html/javadoc'
|
22
22
|
}
|
23
23
|
|
24
|
-
task
|
24
|
+
task rdocHtml(type: JRubyExec) {
|
25
25
|
workingDir '..'
|
26
26
|
jrubyArgs '-ryard', '-eYARD::CLI::Yardoc.run(*ARGV)'
|
27
27
|
script './lib/embulk/version.rb' // dummy
|
28
28
|
scriptArgs 'lib', '-o', 'embulk-docs/build/html/rdoc'
|
29
29
|
}
|
30
30
|
|
31
|
-
task site(type: Copy, dependsOn: ['
|
31
|
+
task site(type: Copy, dependsOn: ['sphinxHtml', 'rdocHtml', 'javadocHtml']) {
|
32
32
|
}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
Release 0.8.10
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed 'IllegalArgumentException: Self-suppression not permitted' error (@hata++) [#446]
|
8
|
+
|
9
|
+
* Fixed preview not to read the entire file when the parser doesn't produce records. Now preview reads first 32KB.
|
10
|
+
|
11
|
+
* Updated JRuby from 9.0.4.0 to 9.1.2.0. Release notes:
|
12
|
+
|
13
|
+
* http://jruby.org/2016/01/26/jruby-9-0-5-0.html
|
14
|
+
|
15
|
+
* http://jruby.org/2016/05/03/jruby-9-1-0-0.html
|
16
|
+
|
17
|
+
* http://jruby.org/2016/05/19/jruby-9-1-1-0.html
|
18
|
+
|
19
|
+
* http://jruby.org/2016/05/27/jruby-9-1-2-0.html
|
20
|
+
|
21
|
+
* Updated msgpack-java from 0.8.7 to 0.8.8. Release notes
|
22
|
+
|
23
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.8/RELEASE_NOTES.md
|
24
|
+
|
25
|
+
Built-in plugins
|
26
|
+
------------------
|
27
|
+
|
28
|
+
* ``csv`` parser plugin supports delimiters longer than 1 character.
|
29
|
+
|
30
|
+
* ``csv`` parser doesn't convert non-quoted empty string into NULL any more when null_string is set. Default behavior is not changed (convert non-quoted empty string into NULL).
|
31
|
+
|
32
|
+
|
33
|
+
Release Date
|
34
|
+
------------------
|
35
|
+
2016-07-21
|
@@ -57,7 +57,7 @@ public class CsvParserPlugin
|
|
57
57
|
|
58
58
|
@Config("delimiter")
|
59
59
|
@ConfigDefault("\",\"")
|
60
|
-
|
60
|
+
String getDelimiter();
|
61
61
|
|
62
62
|
@Config("quote")
|
63
63
|
@ConfigDefault("\"\\\"\"")
|
@@ -233,7 +233,6 @@ public class CsvParserPlugin
|
|
233
233
|
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
|
234
234
|
final JsonParser jsonParser = new JsonParser();
|
235
235
|
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
|
236
|
-
final String nullStringOrNull = task.getNullString().orNull();
|
237
236
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
238
237
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
239
238
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
@@ -344,17 +343,7 @@ public class CsvParserPlugin
|
|
344
343
|
//TODO warning
|
345
344
|
return null;
|
346
345
|
}
|
347
|
-
|
348
|
-
if (!v.isEmpty()) {
|
349
|
-
if (v.equals(nullStringOrNull)) {
|
350
|
-
return null;
|
351
|
-
}
|
352
|
-
return v;
|
353
|
-
} else if (tokenizer.wasQuotedColumn()) {
|
354
|
-
return "";
|
355
|
-
} else {
|
356
|
-
return null;
|
357
|
-
}
|
346
|
+
return tokenizer.nextColumnOrNull();
|
358
347
|
}
|
359
348
|
});
|
360
349
|
|
@@ -7,6 +7,7 @@ import java.util.Deque;
|
|
7
7
|
import java.util.ArrayDeque;
|
8
8
|
import org.embulk.spi.DataException;
|
9
9
|
import org.embulk.spi.util.LineDecoder;
|
10
|
+
import org.embulk.config.ConfigException;
|
10
11
|
|
11
12
|
public class CsvTokenizer
|
12
13
|
{
|
@@ -24,7 +25,8 @@ public class CsvTokenizer
|
|
24
25
|
static final char NO_QUOTE = '\0';
|
25
26
|
static final char NO_ESCAPE = '\0';
|
26
27
|
|
27
|
-
private final char
|
28
|
+
private final char delimiterChar;
|
29
|
+
private final String delimiterFollowingString;
|
28
30
|
private final char quote;
|
29
31
|
private final char escape;
|
30
32
|
private final String newline;
|
@@ -32,6 +34,7 @@ public class CsvTokenizer
|
|
32
34
|
private final long maxQuotedSizeLimit;
|
33
35
|
private final String commentLineMarker;
|
34
36
|
private final LineDecoder input;
|
37
|
+
private final String nullStringOrNull;
|
35
38
|
|
36
39
|
private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
|
37
40
|
private long lineNumber = 0;
|
@@ -44,13 +47,24 @@ public class CsvTokenizer
|
|
44
47
|
|
45
48
|
public CsvTokenizer(LineDecoder input, CsvParserPlugin.PluginTask task)
|
46
49
|
{
|
47
|
-
delimiter = task.
|
50
|
+
String delimiter = task.getDelimiter();
|
51
|
+
if (delimiter.length() == 0) {
|
52
|
+
throw new ConfigException("Empty delimiter is not allowed");
|
53
|
+
} else {
|
54
|
+
this.delimiterChar = delimiter.charAt(0);
|
55
|
+
if (delimiter.length() > 1) {
|
56
|
+
delimiterFollowingString = delimiter.substring(1);
|
57
|
+
} else {
|
58
|
+
delimiterFollowingString = null;
|
59
|
+
}
|
60
|
+
}
|
48
61
|
quote = task.getQuoteChar().or(CsvParserPlugin.QuoteCharacter.noQuote()).getCharacter();
|
49
62
|
escape = task.getEscapeChar().or(CsvParserPlugin.EscapeCharacter.noEscape()).getCharacter();
|
50
63
|
newline = task.getNewline().getString();
|
51
64
|
trimIfNotQuoted = task.getTrimIfNotQuoted();
|
52
65
|
maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
|
53
66
|
commentLineMarker = task.getCommentLineMarker().orNull();
|
67
|
+
nullStringOrNull = task.getNullString().orNull();
|
54
68
|
this.input = input;
|
55
69
|
}
|
56
70
|
|
@@ -91,7 +105,11 @@ public class CsvTokenizer
|
|
91
105
|
|
92
106
|
public boolean nextFile()
|
93
107
|
{
|
94
|
-
|
108
|
+
boolean next = input.nextFile();
|
109
|
+
if (next) {
|
110
|
+
lineNumber = 0;
|
111
|
+
}
|
112
|
+
return next;
|
95
113
|
}
|
96
114
|
|
97
115
|
// used by guess-csv
|
@@ -169,9 +187,15 @@ public class CsvTokenizer
|
|
169
187
|
// this block can be out of the looop.
|
170
188
|
if (isDelimiter(c)) {
|
171
189
|
// empty value
|
172
|
-
|
173
|
-
|
174
|
-
|
190
|
+
if (delimiterFollowingString == null) {
|
191
|
+
return "";
|
192
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
193
|
+
linePos += delimiterFollowingString.length();
|
194
|
+
return "";
|
195
|
+
}
|
196
|
+
// not a delimiter
|
197
|
+
}
|
198
|
+
if (isEndOfLine(c)) {
|
175
199
|
// empty value
|
176
200
|
recordState = RecordState.END;
|
177
201
|
return "";
|
@@ -193,9 +217,15 @@ public class CsvTokenizer
|
|
193
217
|
case FIRST_TRIM:
|
194
218
|
if (isDelimiter(c)) {
|
195
219
|
// empty value
|
196
|
-
|
197
|
-
|
198
|
-
|
220
|
+
if (delimiterFollowingString == null) {
|
221
|
+
return "";
|
222
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
223
|
+
linePos += delimiterFollowingString.length();
|
224
|
+
return "";
|
225
|
+
}
|
226
|
+
// not a delimiter
|
227
|
+
}
|
228
|
+
if (isEndOfLine(c)) {
|
199
229
|
// empty value
|
200
230
|
recordState = RecordState.END;
|
201
231
|
return "";
|
@@ -218,9 +248,16 @@ public class CsvTokenizer
|
|
218
248
|
|
219
249
|
case VALUE:
|
220
250
|
if (isDelimiter(c)) {
|
221
|
-
|
222
|
-
|
223
|
-
|
251
|
+
if (delimiterFollowingString == null) {
|
252
|
+
return line.substring(valueStartPos, linePos - 1);
|
253
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
254
|
+
String value = line.substring(valueStartPos, linePos - 1);
|
255
|
+
linePos += delimiterFollowingString.length();
|
256
|
+
return value;
|
257
|
+
}
|
258
|
+
// not a delimiter
|
259
|
+
}
|
260
|
+
if (isEndOfLine(c)) {
|
224
261
|
recordState = RecordState.END;
|
225
262
|
return line.substring(valueStartPos, linePos);
|
226
263
|
|
@@ -241,9 +278,16 @@ public class CsvTokenizer
|
|
241
278
|
|
242
279
|
case LAST_TRIM_OR_VALUE:
|
243
280
|
if (isDelimiter(c)) {
|
244
|
-
|
245
|
-
|
246
|
-
|
281
|
+
if (delimiterFollowingString == null) {
|
282
|
+
return line.substring(valueStartPos, valueEndPos);
|
283
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
284
|
+
linePos += delimiterFollowingString.length();
|
285
|
+
return line.substring(valueStartPos, valueEndPos);
|
286
|
+
} else {
|
287
|
+
// not a delimiter
|
288
|
+
}
|
289
|
+
}
|
290
|
+
if (isEndOfLine(c)) {
|
247
291
|
recordState = RecordState.END;
|
248
292
|
return line.substring(valueStartPos, valueEndPos);
|
249
293
|
|
@@ -304,9 +348,15 @@ public class CsvTokenizer
|
|
304
348
|
|
305
349
|
case AFTER_QUOTED_VALUE:
|
306
350
|
if (isDelimiter(c)) {
|
307
|
-
|
308
|
-
|
309
|
-
|
351
|
+
if (delimiterFollowingString == null) {
|
352
|
+
return quotedValue.toString();
|
353
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
354
|
+
linePos += delimiterFollowingString.length();
|
355
|
+
return quotedValue.toString();
|
356
|
+
}
|
357
|
+
// not a delimiter
|
358
|
+
}
|
359
|
+
if (isEndOfLine(c)) {
|
310
360
|
recordState = RecordState.END;
|
311
361
|
return quotedValue.toString();
|
312
362
|
|
@@ -324,6 +374,32 @@ public class CsvTokenizer
|
|
324
374
|
}
|
325
375
|
}
|
326
376
|
|
377
|
+
public String nextColumnOrNull()
|
378
|
+
{
|
379
|
+
String v = nextColumn();
|
380
|
+
if (nullStringOrNull == null) {
|
381
|
+
if (v.isEmpty()) {
|
382
|
+
if (wasQuotedColumn) {
|
383
|
+
return "";
|
384
|
+
}
|
385
|
+
else {
|
386
|
+
return null;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
else {
|
390
|
+
return v;
|
391
|
+
}
|
392
|
+
}
|
393
|
+
else {
|
394
|
+
if (v.equals(nullStringOrNull)) {
|
395
|
+
return null;
|
396
|
+
}
|
397
|
+
else {
|
398
|
+
return v;
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
327
403
|
public boolean wasQuotedColumn()
|
328
404
|
{
|
329
405
|
return wasQuotedColumn;
|
@@ -356,9 +432,22 @@ public class CsvTokenizer
|
|
356
432
|
return c == ' ';
|
357
433
|
}
|
358
434
|
|
435
|
+
private boolean isDelimiterFollowingFrom(int pos)
|
436
|
+
{
|
437
|
+
if (line.length() < pos + delimiterFollowingString.length()) {
|
438
|
+
return false;
|
439
|
+
}
|
440
|
+
for (int i = 0; i < delimiterFollowingString.length(); i++) {
|
441
|
+
if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
|
442
|
+
return false;
|
443
|
+
}
|
444
|
+
}
|
445
|
+
return true;
|
446
|
+
}
|
447
|
+
|
359
448
|
private boolean isDelimiter(char c)
|
360
449
|
{
|
361
|
-
return c ==
|
450
|
+
return c == delimiterChar;
|
362
451
|
}
|
363
452
|
|
364
453
|
private boolean isEndOfLine(char c)
|
@@ -33,7 +33,7 @@ public class TestCsvParserPlugin
|
|
33
33
|
assertEquals(Charset.forName("utf-8"), task.getCharset());
|
34
34
|
assertEquals(Newline.CRLF, task.getNewline());
|
35
35
|
assertEquals(false, task.getHeaderLine().or(false));
|
36
|
-
assertEquals(
|
36
|
+
assertEquals(",", task.getDelimiter());
|
37
37
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
|
38
38
|
assertEquals(false, task.getAllowOptionalColumns());
|
39
39
|
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
@@ -68,7 +68,7 @@ public class TestCsvParserPlugin
|
|
68
68
|
assertEquals(Charset.forName("utf-16"), task.getCharset());
|
69
69
|
assertEquals(Newline.LF, task.getNewline());
|
70
70
|
assertEquals(true, task.getHeaderLine().or(false));
|
71
|
-
assertEquals(
|
71
|
+
assertEquals("\t", task.getDelimiter());
|
72
72
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
|
73
73
|
assertEquals(true, task.getAllowOptionalColumns());
|
74
74
|
}
|
@@ -88,12 +88,8 @@ public class TestCsvTokenizer
|
|
88
88
|
while (tokenizer.nextRecord()) {
|
89
89
|
List<String> record = new ArrayList<>();
|
90
90
|
for (Column c : schema.getColumns()) {
|
91
|
-
String v = tokenizer.
|
92
|
-
|
93
|
-
record.add(v);
|
94
|
-
} else {
|
95
|
-
record.add(tokenizer.wasQuotedColumn() ? "" : null);
|
96
|
-
}
|
91
|
+
String v = tokenizer.nextColumnOrNull();
|
92
|
+
record.add(v);
|
97
93
|
}
|
98
94
|
records.add(record);
|
99
95
|
}
|
@@ -202,6 +198,31 @@ public class TestCsvTokenizer
|
|
202
198
|
"ccc\tddd"));
|
203
199
|
}
|
204
200
|
|
201
|
+
@Test
|
202
|
+
public void testDefaultNullString() throws Exception
|
203
|
+
{
|
204
|
+
reloadPluginTask();
|
205
|
+
assertEquals(expectedRecords(2,
|
206
|
+
null, "",
|
207
|
+
"NULL", "NULL"),
|
208
|
+
parse(task,
|
209
|
+
",\"\"",
|
210
|
+
"NULL,\"NULL\""));
|
211
|
+
}
|
212
|
+
|
213
|
+
@Test
|
214
|
+
public void testChangeNullString() throws Exception
|
215
|
+
{
|
216
|
+
config.set("null_string", "NULL");
|
217
|
+
reloadPluginTask();
|
218
|
+
assertEquals(expectedRecords(2,
|
219
|
+
"", "",
|
220
|
+
null, null),
|
221
|
+
parse(task,
|
222
|
+
",\"\"",
|
223
|
+
"NULL,\"NULL\""));
|
224
|
+
}
|
225
|
+
|
205
226
|
@Test
|
206
227
|
public void testQuotedValues() throws Exception
|
207
228
|
{
|
data/embulk.gemspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,127 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 1.10.6
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ">="
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: 1.10.6
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: msgpack
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- - "
|
24
|
+
- - ">="
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
26
|
+
version: 1.10.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - "~>"
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: 0.7.3
|
33
|
+
name: msgpack
|
39
34
|
prerelease: false
|
40
35
|
type: :runtime
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: liquid
|
43
36
|
version_requirements: !ruby/object:Gem::Requirement
|
44
37
|
requirements:
|
45
38
|
- - "~>"
|
46
39
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
40
|
+
version: 0.7.3
|
41
|
+
- !ruby/object:Gem::Dependency
|
48
42
|
requirement: !ruby/object:Gem::Requirement
|
49
43
|
requirements:
|
50
44
|
- - "~>"
|
51
45
|
- !ruby/object:Gem::Version
|
52
46
|
version: 3.0.6
|
47
|
+
name: liquid
|
53
48
|
prerelease: false
|
54
49
|
type: :runtime
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rjack-icu
|
57
50
|
version_requirements: !ruby/object:Gem::Requirement
|
58
51
|
requirements:
|
59
52
|
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
54
|
+
version: 3.0.6
|
55
|
+
- !ruby/object:Gem::Dependency
|
62
56
|
requirement: !ruby/object:Gem::Requirement
|
63
57
|
requirements:
|
64
58
|
- - "~>"
|
65
59
|
- !ruby/object:Gem::Version
|
66
60
|
version: 4.54.1.1
|
61
|
+
name: rjack-icu
|
67
62
|
prerelease: false
|
68
63
|
type: :runtime
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rake
|
71
64
|
version_requirements: !ruby/object:Gem::Requirement
|
72
65
|
requirements:
|
73
|
-
- - "
|
66
|
+
- - "~>"
|
74
67
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
68
|
+
version: 4.54.1.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
76
70
|
requirement: !ruby/object:Gem::Requirement
|
77
71
|
requirements:
|
78
72
|
- - ">="
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: 0.10.0
|
75
|
+
name: rake
|
81
76
|
prerelease: false
|
82
77
|
type: :development
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: test-unit
|
85
78
|
version_requirements: !ruby/object:Gem::Requirement
|
86
79
|
requirements:
|
87
|
-
- - "
|
80
|
+
- - ">="
|
88
81
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
82
|
+
version: 0.10.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
90
84
|
requirement: !ruby/object:Gem::Requirement
|
91
85
|
requirements:
|
92
86
|
- - "~>"
|
93
87
|
- !ruby/object:Gem::Version
|
94
88
|
version: 3.0.9
|
89
|
+
name: test-unit
|
95
90
|
prerelease: false
|
96
91
|
type: :development
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: yard
|
99
92
|
version_requirements: !ruby/object:Gem::Requirement
|
100
93
|
requirements:
|
101
94
|
- - "~>"
|
102
95
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.
|
96
|
+
version: 3.0.9
|
97
|
+
- !ruby/object:Gem::Dependency
|
104
98
|
requirement: !ruby/object:Gem::Requirement
|
105
99
|
requirements:
|
106
100
|
- - "~>"
|
107
101
|
- !ruby/object:Gem::Version
|
108
102
|
version: 0.8.7
|
103
|
+
name: yard
|
109
104
|
prerelease: false
|
110
105
|
type: :development
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: kramdown
|
113
106
|
version_requirements: !ruby/object:Gem::Requirement
|
114
107
|
requirements:
|
115
108
|
- - "~>"
|
116
109
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
110
|
+
version: 0.8.7
|
111
|
+
- !ruby/object:Gem::Dependency
|
118
112
|
requirement: !ruby/object:Gem::Requirement
|
119
113
|
requirements:
|
120
114
|
- - "~>"
|
121
115
|
- !ruby/object:Gem::Version
|
122
116
|
version: 1.5.0
|
117
|
+
name: kramdown
|
123
118
|
prerelease: false
|
124
119
|
type: :development
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.5.0
|
125
125
|
description: Embulk is an open-source, plugin-based bulk data loader to scale and simplify data management across heterogeneous data stores. It can collect and ship any kinds of data in high throughput with transaction control.
|
126
126
|
email:
|
127
127
|
- frsyuki@gmail.com
|
@@ -148,9 +148,9 @@ files:
|
|
148
148
|
- classpath/commons-beanutils-core-1.8.3.jar
|
149
149
|
- classpath/commons-compress-1.10.jar
|
150
150
|
- classpath/commons-lang3-3.1.jar
|
151
|
-
- classpath/embulk-cli-0.8.
|
152
|
-
- classpath/embulk-core-0.8.
|
153
|
-
- classpath/embulk-standards-0.8.
|
151
|
+
- classpath/embulk-cli-0.8.10.jar
|
152
|
+
- classpath/embulk-core-0.8.10.jar
|
153
|
+
- classpath/embulk-standards-0.8.10.jar
|
154
154
|
- classpath/guava-18.0.jar
|
155
155
|
- classpath/guice-4.0.jar
|
156
156
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -166,7 +166,7 @@ files:
|
|
166
166
|
- classpath/joda-time-2.9.2.jar
|
167
167
|
- classpath/logback-classic-1.1.3.jar
|
168
168
|
- classpath/logback-core-1.1.3.jar
|
169
|
-
- classpath/msgpack-core-0.8.
|
169
|
+
- classpath/msgpack-core-0.8.8.jar
|
170
170
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
171
171
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
172
172
|
- classpath/slf4j-api-1.7.12.jar
|
@@ -207,6 +207,7 @@ files:
|
|
207
207
|
- embulk-core/src/main/java/org/embulk/config/UserDataException.java
|
208
208
|
- embulk-core/src/main/java/org/embulk/config/UserDataExceptions.java
|
209
209
|
- embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java
|
210
|
+
- embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java
|
210
211
|
- embulk-core/src/main/java/org/embulk/exec/BulkLoader.java
|
211
212
|
- embulk-core/src/main/java/org/embulk/exec/ConfigurableGuessInputPlugin.java
|
212
213
|
- embulk-core/src/main/java/org/embulk/exec/ExecModule.java
|
@@ -462,6 +463,7 @@ files:
|
|
462
463
|
- embulk-docs/src/release/release-0.7.9.rst
|
463
464
|
- embulk-docs/src/release/release-0.8.0.rst
|
464
465
|
- embulk-docs/src/release/release-0.8.1.rst
|
466
|
+
- embulk-docs/src/release/release-0.8.10.rst
|
465
467
|
- embulk-docs/src/release/release-0.8.2.rst
|
466
468
|
- embulk-docs/src/release/release-0.8.3.rst
|
467
469
|
- embulk-docs/src/release/release-0.8.4.rst
|
@@ -609,7 +611,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
609
611
|
version: '0'
|
610
612
|
requirements: []
|
611
613
|
rubyforge_project:
|
612
|
-
rubygems_version: 2.4
|
614
|
+
rubygems_version: 2.6.4
|
613
615
|
signing_key:
|
614
616
|
specification_version: 4
|
615
617
|
summary: Embulk, a plugin-based parallel bulk data loader
|