embulk 0.8.9 → 0.8.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java +88 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +0 -76
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +2 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +18 -5
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +2 -3
- data/embulk-docs/build.gradle +4 -4
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.10.rst +35 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -13
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +108 -19
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +2 -2
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +27 -6
- data/embulk.gemspec +1 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +1 -1
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a7102ccd2c44833976bac88ae9d0e9817be8743
|
4
|
+
data.tar.gz: 77b0075a55fe8afb19da2afb7f1cb7ce7e7ecb90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17bf7846353ee95ea5f8378534ea89d44ec18e2bb10a06073c803bef75bb8b2563437fa4d752f6e506569a78ab80018abb6c66b146ca144cf9bc3e18b08b44ba
|
7
|
+
data.tar.gz: df8153562d5d64f596ba1a004e38dd2461bbd0809be27013d20d2cc25fd8da4b35a69fde260531ac8cb7b323a1f12232b8c7b7b4f5470471f3b7a8ca2fa80012
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.8.
|
5
|
-
jruby-jars (= 9.
|
4
|
+
embulk (0.8.9)
|
5
|
+
jruby-jars (= 9.1.2.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.
|
10
|
+
jruby-jars (9.1.2.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
@@ -27,4 +27,4 @@ DEPENDENCIES
|
|
27
27
|
yard (~> 0.8.7)
|
28
28
|
|
29
29
|
BUNDLED WITH
|
30
|
-
1.
|
30
|
+
1.12.4
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.10'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.
|
22
|
+
jrubyVersion = '9.1.2.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -38,7 +38,7 @@ dependencies {
|
|
38
38
|
compile 'joda-time:joda-time:2.9.2'
|
39
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
40
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
-
compile 'org.msgpack:msgpack-core:0.8.
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.8'
|
42
42
|
|
43
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
44
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -0,0 +1,88 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.config.TaskReport;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.spi.Buffer;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.FileInputPlugin;
|
10
|
+
import org.embulk.spi.TransactionalFileInput;
|
11
|
+
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
public class BufferFileInputPlugin
|
15
|
+
implements FileInputPlugin
|
16
|
+
{
|
17
|
+
private Buffer buffer;
|
18
|
+
|
19
|
+
public BufferFileInputPlugin(Buffer buffer)
|
20
|
+
{
|
21
|
+
this.buffer = buffer;
|
22
|
+
}
|
23
|
+
|
24
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
25
|
+
{
|
26
|
+
control.run(Exec.newTaskSource(), 1);
|
27
|
+
return Exec.newConfigDiff();
|
28
|
+
}
|
29
|
+
|
30
|
+
public ConfigDiff resume(TaskSource taskSource,
|
31
|
+
int taskCount,
|
32
|
+
FileInputPlugin.Control control)
|
33
|
+
{
|
34
|
+
throw new UnsupportedOperationException();
|
35
|
+
}
|
36
|
+
|
37
|
+
public void cleanup(TaskSource taskSource,
|
38
|
+
int taskCount,
|
39
|
+
List<TaskReport> successTaskReports)
|
40
|
+
{
|
41
|
+
if (buffer != null) {
|
42
|
+
buffer.release();
|
43
|
+
buffer = null;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
48
|
+
{
|
49
|
+
return new BufferTransactionalFileInput(buffer);
|
50
|
+
}
|
51
|
+
|
52
|
+
private static class BufferTransactionalFileInput
|
53
|
+
implements TransactionalFileInput
|
54
|
+
{
|
55
|
+
private Buffer buffer;
|
56
|
+
|
57
|
+
public BufferTransactionalFileInput(Buffer buffer)
|
58
|
+
{
|
59
|
+
this.buffer = buffer;
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public Buffer poll()
|
64
|
+
{
|
65
|
+
Buffer b = buffer;
|
66
|
+
buffer = null;
|
67
|
+
return b;
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public boolean nextFile()
|
72
|
+
{
|
73
|
+
return buffer != null;
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void close() { }
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void abort() { }
|
81
|
+
|
82
|
+
@Override
|
83
|
+
public TaskReport commit()
|
84
|
+
{
|
85
|
+
return null;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
@@ -31,7 +31,7 @@ public class ExecModule
|
|
31
31
|
binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
|
32
32
|
binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
|
33
33
|
|
34
|
-
// GuessExecutor
|
34
|
+
// GuessExecutor, PreviewExecutor
|
35
35
|
registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
|
36
36
|
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
37
37
|
|
@@ -191,82 +191,6 @@ public class GuessExecutor
|
|
191
191
|
return lastGuessed;
|
192
192
|
}
|
193
193
|
|
194
|
-
private static class BufferFileInputPlugin
|
195
|
-
implements FileInputPlugin
|
196
|
-
{
|
197
|
-
private Buffer buffer;
|
198
|
-
|
199
|
-
public BufferFileInputPlugin(Buffer buffer)
|
200
|
-
{
|
201
|
-
this.buffer = buffer;
|
202
|
-
}
|
203
|
-
|
204
|
-
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
205
|
-
{
|
206
|
-
control.run(Exec.newTaskSource(), 1);
|
207
|
-
return Exec.newConfigDiff();
|
208
|
-
}
|
209
|
-
|
210
|
-
public ConfigDiff resume(TaskSource taskSource,
|
211
|
-
int taskCount,
|
212
|
-
FileInputPlugin.Control control)
|
213
|
-
{
|
214
|
-
throw new UnsupportedOperationException();
|
215
|
-
}
|
216
|
-
|
217
|
-
public void cleanup(TaskSource taskSource,
|
218
|
-
int taskCount,
|
219
|
-
List<TaskReport> successTaskReports)
|
220
|
-
{
|
221
|
-
if (buffer != null) {
|
222
|
-
buffer.release();
|
223
|
-
buffer = null;
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
228
|
-
{
|
229
|
-
return new BufferTransactionalFileInput(buffer);
|
230
|
-
}
|
231
|
-
}
|
232
|
-
|
233
|
-
private static class BufferTransactionalFileInput
|
234
|
-
implements TransactionalFileInput
|
235
|
-
{
|
236
|
-
private Buffer buffer;
|
237
|
-
|
238
|
-
public BufferTransactionalFileInput(Buffer buffer)
|
239
|
-
{
|
240
|
-
this.buffer = buffer;
|
241
|
-
}
|
242
|
-
|
243
|
-
@Override
|
244
|
-
public Buffer poll()
|
245
|
-
{
|
246
|
-
Buffer b = buffer;
|
247
|
-
buffer = null;
|
248
|
-
return b;
|
249
|
-
}
|
250
|
-
|
251
|
-
@Override
|
252
|
-
public boolean nextFile()
|
253
|
-
{
|
254
|
-
return buffer != null;
|
255
|
-
}
|
256
|
-
|
257
|
-
@Override
|
258
|
-
public void close() { }
|
259
|
-
|
260
|
-
@Override
|
261
|
-
public void abort() { }
|
262
|
-
|
263
|
-
@Override
|
264
|
-
public TaskReport commit()
|
265
|
-
{
|
266
|
-
return null;
|
267
|
-
}
|
268
|
-
}
|
269
|
-
|
270
194
|
public static class GuessParserPlugin
|
271
195
|
implements ParserPlugin
|
272
196
|
{
|
@@ -301,6 +301,7 @@ public class LocalExecutorPlugin
|
|
301
301
|
|
302
302
|
// outputCommitted
|
303
303
|
tran.commit();
|
304
|
+
aborter.dontAbort();
|
304
305
|
}
|
305
306
|
}
|
306
307
|
finally {
|
@@ -551,6 +552,7 @@ public class LocalExecutorPlugin
|
|
551
552
|
catch (InterruptedException ex) {
|
552
553
|
error = ex;
|
553
554
|
}
|
555
|
+
outputWorkers[i] = null;
|
554
556
|
if (error != null) {
|
555
557
|
throw Throwables.propagate(error);
|
556
558
|
}
|
@@ -13,6 +13,9 @@ import org.embulk.config.TaskSource;
|
|
13
13
|
import org.embulk.config.ConfigSource;
|
14
14
|
import org.embulk.config.TaskReport;
|
15
15
|
import org.embulk.plugin.PluginType;
|
16
|
+
import org.embulk.spi.Buffer;
|
17
|
+
import org.embulk.spi.FileInputPlugin;
|
18
|
+
import org.embulk.spi.FileInputRunner;
|
16
19
|
import org.embulk.spi.Schema;
|
17
20
|
import org.embulk.spi.Page;
|
18
21
|
import org.embulk.spi.PageOutput;
|
@@ -85,10 +88,22 @@ public class PreviewExecutor
|
|
85
88
|
|
86
89
|
private PreviewResult doPreview(ConfigSource config)
|
87
90
|
{
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
+
PreviewTask task = config.loadConfig(PreviewTask.class);
|
92
|
+
InputPlugin inputPlugin = newInputPlugin(task);
|
93
|
+
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
94
|
+
|
95
|
+
if (inputPlugin instanceof FileInputRunner) { // file input runner
|
96
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"));
|
97
|
+
FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample));
|
98
|
+
return doPreview(task, previewRunner, filterPlugins);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
return doPreview(task, inputPlugin, filterPlugins);
|
102
|
+
}
|
103
|
+
}
|
91
104
|
|
105
|
+
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins)
|
106
|
+
{
|
92
107
|
try {
|
93
108
|
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
94
109
|
public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
|
@@ -96,8 +111,6 @@ public class PreviewExecutor
|
|
96
111
|
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
97
112
|
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
98
113
|
{
|
99
|
-
InputPlugin input = newInputPlugin(task);
|
100
|
-
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
101
114
|
Schema inputSchema = filterSchemas.get(0);
|
102
115
|
Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
|
103
116
|
|
@@ -83,9 +83,8 @@ public class FileInputRunner
|
|
83
83
|
public ConfigDiff guess(ConfigSource execConfig, ConfigSource config)
|
84
84
|
{
|
85
85
|
Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
|
86
|
-
|
87
|
-
|
88
|
-
}
|
86
|
+
// SamplingParserPlugin.runFileInputSampling throws NoSampleException if there're
|
87
|
+
// no files or all files are smaller than minSampleSize (40 bytes).
|
89
88
|
|
90
89
|
GuessExecutor guessExecutor = Exec.getInjector().getInstance(GuessExecutor.class);
|
91
90
|
return guessExecutor.guessParserConfig(sample, config, execConfig);
|
data/embulk-docs/build.gradle
CHANGED
@@ -9,24 +9,24 @@ dependencies {
|
|
9
9
|
jrubyExec 'rubygems:yard:0.8.7.6'
|
10
10
|
}
|
11
11
|
|
12
|
-
task
|
12
|
+
task sphinxHtml(type: Exec) {
|
13
13
|
workingDir '.'
|
14
14
|
commandLine 'make'
|
15
15
|
args 'html'
|
16
16
|
}
|
17
17
|
|
18
|
-
task
|
18
|
+
task javadocHtml(type: Copy, dependsOn: [':embulk-core:javadoc']) {
|
19
19
|
doFirst { file('build/html/javadoc').mkdirs() }
|
20
20
|
from project(':embulk-core').javadoc.destinationDir
|
21
21
|
into 'build/html/javadoc'
|
22
22
|
}
|
23
23
|
|
24
|
-
task
|
24
|
+
task rdocHtml(type: JRubyExec) {
|
25
25
|
workingDir '..'
|
26
26
|
jrubyArgs '-ryard', '-eYARD::CLI::Yardoc.run(*ARGV)'
|
27
27
|
script './lib/embulk/version.rb' // dummy
|
28
28
|
scriptArgs 'lib', '-o', 'embulk-docs/build/html/rdoc'
|
29
29
|
}
|
30
30
|
|
31
|
-
task site(type: Copy, dependsOn: ['
|
31
|
+
task site(type: Copy, dependsOn: ['sphinxHtml', 'rdocHtml', 'javadocHtml']) {
|
32
32
|
}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
Release 0.8.10
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed 'IllegalArgumentException: Self-suppression not permitted' error (@hata++) [#446]
|
8
|
+
|
9
|
+
* Fixed preview not to read the entire file when the parser doesn't produce records. Now preview reads first 32KB.
|
10
|
+
|
11
|
+
* Updated JRuby from 9.0.4.0 to 9.1.2.0. Release notes:
|
12
|
+
|
13
|
+
* http://jruby.org/2016/01/26/jruby-9-0-5-0.html
|
14
|
+
|
15
|
+
* http://jruby.org/2016/05/03/jruby-9-1-0-0.html
|
16
|
+
|
17
|
+
* http://jruby.org/2016/05/19/jruby-9-1-1-0.html
|
18
|
+
|
19
|
+
* http://jruby.org/2016/05/27/jruby-9-1-2-0.html
|
20
|
+
|
21
|
+
* Updated msgpack-java from 0.8.7 to 0.8.8. Release notes
|
22
|
+
|
23
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.8/RELEASE_NOTES.md
|
24
|
+
|
25
|
+
Built-in plugins
|
26
|
+
------------------
|
27
|
+
|
28
|
+
* ``csv`` parser plugin supports delimiters longer than 1 character.
|
29
|
+
|
30
|
+
* ``csv`` parser doesn't convert non-quoted empty string into NULL any more when null_string is set. Default behavior is not changed (convert non-quoted empty string into NULL).
|
31
|
+
|
32
|
+
|
33
|
+
Release Date
|
34
|
+
------------------
|
35
|
+
2016-07-21
|
@@ -57,7 +57,7 @@ public class CsvParserPlugin
|
|
57
57
|
|
58
58
|
@Config("delimiter")
|
59
59
|
@ConfigDefault("\",\"")
|
60
|
-
|
60
|
+
String getDelimiter();
|
61
61
|
|
62
62
|
@Config("quote")
|
63
63
|
@ConfigDefault("\"\\\"\"")
|
@@ -233,7 +233,6 @@ public class CsvParserPlugin
|
|
233
233
|
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
|
234
234
|
final JsonParser jsonParser = new JsonParser();
|
235
235
|
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
|
236
|
-
final String nullStringOrNull = task.getNullString().orNull();
|
237
236
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
238
237
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
239
238
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
@@ -344,17 +343,7 @@ public class CsvParserPlugin
|
|
344
343
|
//TODO warning
|
345
344
|
return null;
|
346
345
|
}
|
347
|
-
|
348
|
-
if (!v.isEmpty()) {
|
349
|
-
if (v.equals(nullStringOrNull)) {
|
350
|
-
return null;
|
351
|
-
}
|
352
|
-
return v;
|
353
|
-
} else if (tokenizer.wasQuotedColumn()) {
|
354
|
-
return "";
|
355
|
-
} else {
|
356
|
-
return null;
|
357
|
-
}
|
346
|
+
return tokenizer.nextColumnOrNull();
|
358
347
|
}
|
359
348
|
});
|
360
349
|
|
@@ -7,6 +7,7 @@ import java.util.Deque;
|
|
7
7
|
import java.util.ArrayDeque;
|
8
8
|
import org.embulk.spi.DataException;
|
9
9
|
import org.embulk.spi.util.LineDecoder;
|
10
|
+
import org.embulk.config.ConfigException;
|
10
11
|
|
11
12
|
public class CsvTokenizer
|
12
13
|
{
|
@@ -24,7 +25,8 @@ public class CsvTokenizer
|
|
24
25
|
static final char NO_QUOTE = '\0';
|
25
26
|
static final char NO_ESCAPE = '\0';
|
26
27
|
|
27
|
-
private final char
|
28
|
+
private final char delimiterChar;
|
29
|
+
private final String delimiterFollowingString;
|
28
30
|
private final char quote;
|
29
31
|
private final char escape;
|
30
32
|
private final String newline;
|
@@ -32,6 +34,7 @@ public class CsvTokenizer
|
|
32
34
|
private final long maxQuotedSizeLimit;
|
33
35
|
private final String commentLineMarker;
|
34
36
|
private final LineDecoder input;
|
37
|
+
private final String nullStringOrNull;
|
35
38
|
|
36
39
|
private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
|
37
40
|
private long lineNumber = 0;
|
@@ -44,13 +47,24 @@ public class CsvTokenizer
|
|
44
47
|
|
45
48
|
public CsvTokenizer(LineDecoder input, CsvParserPlugin.PluginTask task)
|
46
49
|
{
|
47
|
-
delimiter = task.
|
50
|
+
String delimiter = task.getDelimiter();
|
51
|
+
if (delimiter.length() == 0) {
|
52
|
+
throw new ConfigException("Empty delimiter is not allowed");
|
53
|
+
} else {
|
54
|
+
this.delimiterChar = delimiter.charAt(0);
|
55
|
+
if (delimiter.length() > 1) {
|
56
|
+
delimiterFollowingString = delimiter.substring(1);
|
57
|
+
} else {
|
58
|
+
delimiterFollowingString = null;
|
59
|
+
}
|
60
|
+
}
|
48
61
|
quote = task.getQuoteChar().or(CsvParserPlugin.QuoteCharacter.noQuote()).getCharacter();
|
49
62
|
escape = task.getEscapeChar().or(CsvParserPlugin.EscapeCharacter.noEscape()).getCharacter();
|
50
63
|
newline = task.getNewline().getString();
|
51
64
|
trimIfNotQuoted = task.getTrimIfNotQuoted();
|
52
65
|
maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
|
53
66
|
commentLineMarker = task.getCommentLineMarker().orNull();
|
67
|
+
nullStringOrNull = task.getNullString().orNull();
|
54
68
|
this.input = input;
|
55
69
|
}
|
56
70
|
|
@@ -91,7 +105,11 @@ public class CsvTokenizer
|
|
91
105
|
|
92
106
|
public boolean nextFile()
|
93
107
|
{
|
94
|
-
|
108
|
+
boolean next = input.nextFile();
|
109
|
+
if (next) {
|
110
|
+
lineNumber = 0;
|
111
|
+
}
|
112
|
+
return next;
|
95
113
|
}
|
96
114
|
|
97
115
|
// used by guess-csv
|
@@ -169,9 +187,15 @@ public class CsvTokenizer
|
|
169
187
|
// this block can be out of the looop.
|
170
188
|
if (isDelimiter(c)) {
|
171
189
|
// empty value
|
172
|
-
|
173
|
-
|
174
|
-
|
190
|
+
if (delimiterFollowingString == null) {
|
191
|
+
return "";
|
192
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
193
|
+
linePos += delimiterFollowingString.length();
|
194
|
+
return "";
|
195
|
+
}
|
196
|
+
// not a delimiter
|
197
|
+
}
|
198
|
+
if (isEndOfLine(c)) {
|
175
199
|
// empty value
|
176
200
|
recordState = RecordState.END;
|
177
201
|
return "";
|
@@ -193,9 +217,15 @@ public class CsvTokenizer
|
|
193
217
|
case FIRST_TRIM:
|
194
218
|
if (isDelimiter(c)) {
|
195
219
|
// empty value
|
196
|
-
|
197
|
-
|
198
|
-
|
220
|
+
if (delimiterFollowingString == null) {
|
221
|
+
return "";
|
222
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
223
|
+
linePos += delimiterFollowingString.length();
|
224
|
+
return "";
|
225
|
+
}
|
226
|
+
// not a delimiter
|
227
|
+
}
|
228
|
+
if (isEndOfLine(c)) {
|
199
229
|
// empty value
|
200
230
|
recordState = RecordState.END;
|
201
231
|
return "";
|
@@ -218,9 +248,16 @@ public class CsvTokenizer
|
|
218
248
|
|
219
249
|
case VALUE:
|
220
250
|
if (isDelimiter(c)) {
|
221
|
-
|
222
|
-
|
223
|
-
|
251
|
+
if (delimiterFollowingString == null) {
|
252
|
+
return line.substring(valueStartPos, linePos - 1);
|
253
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
254
|
+
String value = line.substring(valueStartPos, linePos - 1);
|
255
|
+
linePos += delimiterFollowingString.length();
|
256
|
+
return value;
|
257
|
+
}
|
258
|
+
// not a delimiter
|
259
|
+
}
|
260
|
+
if (isEndOfLine(c)) {
|
224
261
|
recordState = RecordState.END;
|
225
262
|
return line.substring(valueStartPos, linePos);
|
226
263
|
|
@@ -241,9 +278,16 @@ public class CsvTokenizer
|
|
241
278
|
|
242
279
|
case LAST_TRIM_OR_VALUE:
|
243
280
|
if (isDelimiter(c)) {
|
244
|
-
|
245
|
-
|
246
|
-
|
281
|
+
if (delimiterFollowingString == null) {
|
282
|
+
return line.substring(valueStartPos, valueEndPos);
|
283
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
284
|
+
linePos += delimiterFollowingString.length();
|
285
|
+
return line.substring(valueStartPos, valueEndPos);
|
286
|
+
} else {
|
287
|
+
// not a delimiter
|
288
|
+
}
|
289
|
+
}
|
290
|
+
if (isEndOfLine(c)) {
|
247
291
|
recordState = RecordState.END;
|
248
292
|
return line.substring(valueStartPos, valueEndPos);
|
249
293
|
|
@@ -304,9 +348,15 @@ public class CsvTokenizer
|
|
304
348
|
|
305
349
|
case AFTER_QUOTED_VALUE:
|
306
350
|
if (isDelimiter(c)) {
|
307
|
-
|
308
|
-
|
309
|
-
|
351
|
+
if (delimiterFollowingString == null) {
|
352
|
+
return quotedValue.toString();
|
353
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
354
|
+
linePos += delimiterFollowingString.length();
|
355
|
+
return quotedValue.toString();
|
356
|
+
}
|
357
|
+
// not a delimiter
|
358
|
+
}
|
359
|
+
if (isEndOfLine(c)) {
|
310
360
|
recordState = RecordState.END;
|
311
361
|
return quotedValue.toString();
|
312
362
|
|
@@ -324,6 +374,32 @@ public class CsvTokenizer
|
|
324
374
|
}
|
325
375
|
}
|
326
376
|
|
377
|
+
public String nextColumnOrNull()
|
378
|
+
{
|
379
|
+
String v = nextColumn();
|
380
|
+
if (nullStringOrNull == null) {
|
381
|
+
if (v.isEmpty()) {
|
382
|
+
if (wasQuotedColumn) {
|
383
|
+
return "";
|
384
|
+
}
|
385
|
+
else {
|
386
|
+
return null;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
else {
|
390
|
+
return v;
|
391
|
+
}
|
392
|
+
}
|
393
|
+
else {
|
394
|
+
if (v.equals(nullStringOrNull)) {
|
395
|
+
return null;
|
396
|
+
}
|
397
|
+
else {
|
398
|
+
return v;
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
327
403
|
public boolean wasQuotedColumn()
|
328
404
|
{
|
329
405
|
return wasQuotedColumn;
|
@@ -356,9 +432,22 @@ public class CsvTokenizer
|
|
356
432
|
return c == ' ';
|
357
433
|
}
|
358
434
|
|
435
|
+
private boolean isDelimiterFollowingFrom(int pos)
|
436
|
+
{
|
437
|
+
if (line.length() < pos + delimiterFollowingString.length()) {
|
438
|
+
return false;
|
439
|
+
}
|
440
|
+
for (int i = 0; i < delimiterFollowingString.length(); i++) {
|
441
|
+
if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
|
442
|
+
return false;
|
443
|
+
}
|
444
|
+
}
|
445
|
+
return true;
|
446
|
+
}
|
447
|
+
|
359
448
|
private boolean isDelimiter(char c)
|
360
449
|
{
|
361
|
-
return c ==
|
450
|
+
return c == delimiterChar;
|
362
451
|
}
|
363
452
|
|
364
453
|
private boolean isEndOfLine(char c)
|
@@ -33,7 +33,7 @@ public class TestCsvParserPlugin
|
|
33
33
|
assertEquals(Charset.forName("utf-8"), task.getCharset());
|
34
34
|
assertEquals(Newline.CRLF, task.getNewline());
|
35
35
|
assertEquals(false, task.getHeaderLine().or(false));
|
36
|
-
assertEquals(
|
36
|
+
assertEquals(",", task.getDelimiter());
|
37
37
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
|
38
38
|
assertEquals(false, task.getAllowOptionalColumns());
|
39
39
|
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
@@ -68,7 +68,7 @@ public class TestCsvParserPlugin
|
|
68
68
|
assertEquals(Charset.forName("utf-16"), task.getCharset());
|
69
69
|
assertEquals(Newline.LF, task.getNewline());
|
70
70
|
assertEquals(true, task.getHeaderLine().or(false));
|
71
|
-
assertEquals(
|
71
|
+
assertEquals("\t", task.getDelimiter());
|
72
72
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
|
73
73
|
assertEquals(true, task.getAllowOptionalColumns());
|
74
74
|
}
|
@@ -88,12 +88,8 @@ public class TestCsvTokenizer
|
|
88
88
|
while (tokenizer.nextRecord()) {
|
89
89
|
List<String> record = new ArrayList<>();
|
90
90
|
for (Column c : schema.getColumns()) {
|
91
|
-
String v = tokenizer.
|
92
|
-
|
93
|
-
record.add(v);
|
94
|
-
} else {
|
95
|
-
record.add(tokenizer.wasQuotedColumn() ? "" : null);
|
96
|
-
}
|
91
|
+
String v = tokenizer.nextColumnOrNull();
|
92
|
+
record.add(v);
|
97
93
|
}
|
98
94
|
records.add(record);
|
99
95
|
}
|
@@ -202,6 +198,31 @@ public class TestCsvTokenizer
|
|
202
198
|
"ccc\tddd"));
|
203
199
|
}
|
204
200
|
|
201
|
+
@Test
|
202
|
+
public void testDefaultNullString() throws Exception
|
203
|
+
{
|
204
|
+
reloadPluginTask();
|
205
|
+
assertEquals(expectedRecords(2,
|
206
|
+
null, "",
|
207
|
+
"NULL", "NULL"),
|
208
|
+
parse(task,
|
209
|
+
",\"\"",
|
210
|
+
"NULL,\"NULL\""));
|
211
|
+
}
|
212
|
+
|
213
|
+
@Test
|
214
|
+
public void testChangeNullString() throws Exception
|
215
|
+
{
|
216
|
+
config.set("null_string", "NULL");
|
217
|
+
reloadPluginTask();
|
218
|
+
assertEquals(expectedRecords(2,
|
219
|
+
"", "",
|
220
|
+
null, null),
|
221
|
+
parse(task,
|
222
|
+
",\"\"",
|
223
|
+
"NULL,\"NULL\""));
|
224
|
+
}
|
225
|
+
|
205
226
|
@Test
|
206
227
|
public void testQuotedValues() throws Exception
|
207
228
|
{
|
data/embulk.gemspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jruby-jars
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 9.
|
19
|
+
version: 9.1.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 9.
|
26
|
+
version: 9.1.2.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,9 +108,9 @@ files:
|
|
108
108
|
- classpath/commons-beanutils-core-1.8.3.jar
|
109
109
|
- classpath/commons-compress-1.10.jar
|
110
110
|
- classpath/commons-lang3-3.1.jar
|
111
|
-
- classpath/embulk-cli-0.8.
|
112
|
-
- classpath/embulk-core-0.8.
|
113
|
-
- classpath/embulk-standards-0.8.
|
111
|
+
- classpath/embulk-cli-0.8.10.jar
|
112
|
+
- classpath/embulk-core-0.8.10.jar
|
113
|
+
- classpath/embulk-standards-0.8.10.jar
|
114
114
|
- classpath/guava-18.0.jar
|
115
115
|
- classpath/guice-4.0.jar
|
116
116
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -126,7 +126,7 @@ files:
|
|
126
126
|
- classpath/joda-time-2.9.2.jar
|
127
127
|
- classpath/logback-classic-1.1.3.jar
|
128
128
|
- classpath/logback-core-1.1.3.jar
|
129
|
-
- classpath/msgpack-core-0.8.
|
129
|
+
- classpath/msgpack-core-0.8.8.jar
|
130
130
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
131
131
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
132
132
|
- classpath/slf4j-api-1.7.12.jar
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- embulk-core/src/main/java/org/embulk/config/UserDataException.java
|
168
168
|
- embulk-core/src/main/java/org/embulk/config/UserDataExceptions.java
|
169
169
|
- embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java
|
170
|
+
- embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java
|
170
171
|
- embulk-core/src/main/java/org/embulk/exec/BulkLoader.java
|
171
172
|
- embulk-core/src/main/java/org/embulk/exec/ConfigurableGuessInputPlugin.java
|
172
173
|
- embulk-core/src/main/java/org/embulk/exec/ExecModule.java
|
@@ -422,6 +423,7 @@ files:
|
|
422
423
|
- embulk-docs/src/release/release-0.7.9.rst
|
423
424
|
- embulk-docs/src/release/release-0.8.0.rst
|
424
425
|
- embulk-docs/src/release/release-0.8.1.rst
|
426
|
+
- embulk-docs/src/release/release-0.8.10.rst
|
425
427
|
- embulk-docs/src/release/release-0.8.2.rst
|
426
428
|
- embulk-docs/src/release/release-0.8.3.rst
|
427
429
|
- embulk-docs/src/release/release-0.8.4.rst
|