embulk 0.8.9-java → 0.8.10-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java +88 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +0 -76
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +2 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +18 -5
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +2 -3
- data/embulk-docs/build.gradle +4 -4
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.10.rst +35 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +2 -13
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +108 -19
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +2 -2
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +27 -6
- data/embulk.gemspec +1 -1
- data/lib/embulk/command/embulk_migrate_plugin.rb +1 -1
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +39 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a7366cd20cfc525461f9cd341f277cefde3b830
|
4
|
+
data.tar.gz: 23301c5e13ab5d3be273df7187f28a87b727b46b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3687badaebab213a269d1b2685498c596f70c94f4829b57dec01f7b6358f2b758a035d572fc1e021e7c52c4fdab3c337d1cfc5a24aadca5e2ec87899ddb71949
|
7
|
+
data.tar.gz: 7b8047ebd25abefb9696df4bde8133bd33ba197884891675bc78124e3a4347f0d4757fb0f9739b64b673ecc5e11541644cf3432b528d199049112e601ec162a0
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.8.
|
5
|
-
jruby-jars (= 9.
|
4
|
+
embulk (0.8.9)
|
5
|
+
jruby-jars (= 9.1.2.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.
|
10
|
+
jruby-jars (9.1.2.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
@@ -27,4 +27,4 @@ DEPENDENCIES
|
|
27
27
|
yard (~> 0.8.7)
|
28
28
|
|
29
29
|
BUNDLED WITH
|
30
|
-
1.
|
30
|
+
1.12.4
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.8.
|
19
|
+
version = '0.8.10'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.
|
22
|
+
jrubyVersion = '9.1.2.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -38,7 +38,7 @@ dependencies {
|
|
38
38
|
compile 'joda-time:joda-time:2.9.2'
|
39
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
40
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
-
compile 'org.msgpack:msgpack-core:0.8.
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.8'
|
42
42
|
|
43
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
44
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -0,0 +1,88 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.config.TaskReport;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.spi.Buffer;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.FileInputPlugin;
|
10
|
+
import org.embulk.spi.TransactionalFileInput;
|
11
|
+
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
public class BufferFileInputPlugin
|
15
|
+
implements FileInputPlugin
|
16
|
+
{
|
17
|
+
private Buffer buffer;
|
18
|
+
|
19
|
+
public BufferFileInputPlugin(Buffer buffer)
|
20
|
+
{
|
21
|
+
this.buffer = buffer;
|
22
|
+
}
|
23
|
+
|
24
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
25
|
+
{
|
26
|
+
control.run(Exec.newTaskSource(), 1);
|
27
|
+
return Exec.newConfigDiff();
|
28
|
+
}
|
29
|
+
|
30
|
+
public ConfigDiff resume(TaskSource taskSource,
|
31
|
+
int taskCount,
|
32
|
+
FileInputPlugin.Control control)
|
33
|
+
{
|
34
|
+
throw new UnsupportedOperationException();
|
35
|
+
}
|
36
|
+
|
37
|
+
public void cleanup(TaskSource taskSource,
|
38
|
+
int taskCount,
|
39
|
+
List<TaskReport> successTaskReports)
|
40
|
+
{
|
41
|
+
if (buffer != null) {
|
42
|
+
buffer.release();
|
43
|
+
buffer = null;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
48
|
+
{
|
49
|
+
return new BufferTransactionalFileInput(buffer);
|
50
|
+
}
|
51
|
+
|
52
|
+
private static class BufferTransactionalFileInput
|
53
|
+
implements TransactionalFileInput
|
54
|
+
{
|
55
|
+
private Buffer buffer;
|
56
|
+
|
57
|
+
public BufferTransactionalFileInput(Buffer buffer)
|
58
|
+
{
|
59
|
+
this.buffer = buffer;
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public Buffer poll()
|
64
|
+
{
|
65
|
+
Buffer b = buffer;
|
66
|
+
buffer = null;
|
67
|
+
return b;
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public boolean nextFile()
|
72
|
+
{
|
73
|
+
return buffer != null;
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void close() { }
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void abort() { }
|
81
|
+
|
82
|
+
@Override
|
83
|
+
public TaskReport commit()
|
84
|
+
{
|
85
|
+
return null;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
@@ -31,7 +31,7 @@ public class ExecModule
|
|
31
31
|
binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
|
32
32
|
binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
|
33
33
|
|
34
|
-
// GuessExecutor
|
34
|
+
// GuessExecutor, PreviewExecutor
|
35
35
|
registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
|
36
36
|
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
37
37
|
|
@@ -191,82 +191,6 @@ public class GuessExecutor
|
|
191
191
|
return lastGuessed;
|
192
192
|
}
|
193
193
|
|
194
|
-
private static class BufferFileInputPlugin
|
195
|
-
implements FileInputPlugin
|
196
|
-
{
|
197
|
-
private Buffer buffer;
|
198
|
-
|
199
|
-
public BufferFileInputPlugin(Buffer buffer)
|
200
|
-
{
|
201
|
-
this.buffer = buffer;
|
202
|
-
}
|
203
|
-
|
204
|
-
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
205
|
-
{
|
206
|
-
control.run(Exec.newTaskSource(), 1);
|
207
|
-
return Exec.newConfigDiff();
|
208
|
-
}
|
209
|
-
|
210
|
-
public ConfigDiff resume(TaskSource taskSource,
|
211
|
-
int taskCount,
|
212
|
-
FileInputPlugin.Control control)
|
213
|
-
{
|
214
|
-
throw new UnsupportedOperationException();
|
215
|
-
}
|
216
|
-
|
217
|
-
public void cleanup(TaskSource taskSource,
|
218
|
-
int taskCount,
|
219
|
-
List<TaskReport> successTaskReports)
|
220
|
-
{
|
221
|
-
if (buffer != null) {
|
222
|
-
buffer.release();
|
223
|
-
buffer = null;
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
228
|
-
{
|
229
|
-
return new BufferTransactionalFileInput(buffer);
|
230
|
-
}
|
231
|
-
}
|
232
|
-
|
233
|
-
private static class BufferTransactionalFileInput
|
234
|
-
implements TransactionalFileInput
|
235
|
-
{
|
236
|
-
private Buffer buffer;
|
237
|
-
|
238
|
-
public BufferTransactionalFileInput(Buffer buffer)
|
239
|
-
{
|
240
|
-
this.buffer = buffer;
|
241
|
-
}
|
242
|
-
|
243
|
-
@Override
|
244
|
-
public Buffer poll()
|
245
|
-
{
|
246
|
-
Buffer b = buffer;
|
247
|
-
buffer = null;
|
248
|
-
return b;
|
249
|
-
}
|
250
|
-
|
251
|
-
@Override
|
252
|
-
public boolean nextFile()
|
253
|
-
{
|
254
|
-
return buffer != null;
|
255
|
-
}
|
256
|
-
|
257
|
-
@Override
|
258
|
-
public void close() { }
|
259
|
-
|
260
|
-
@Override
|
261
|
-
public void abort() { }
|
262
|
-
|
263
|
-
@Override
|
264
|
-
public TaskReport commit()
|
265
|
-
{
|
266
|
-
return null;
|
267
|
-
}
|
268
|
-
}
|
269
|
-
|
270
194
|
public static class GuessParserPlugin
|
271
195
|
implements ParserPlugin
|
272
196
|
{
|
@@ -301,6 +301,7 @@ public class LocalExecutorPlugin
|
|
301
301
|
|
302
302
|
// outputCommitted
|
303
303
|
tran.commit();
|
304
|
+
aborter.dontAbort();
|
304
305
|
}
|
305
306
|
}
|
306
307
|
finally {
|
@@ -551,6 +552,7 @@ public class LocalExecutorPlugin
|
|
551
552
|
catch (InterruptedException ex) {
|
552
553
|
error = ex;
|
553
554
|
}
|
555
|
+
outputWorkers[i] = null;
|
554
556
|
if (error != null) {
|
555
557
|
throw Throwables.propagate(error);
|
556
558
|
}
|
@@ -13,6 +13,9 @@ import org.embulk.config.TaskSource;
|
|
13
13
|
import org.embulk.config.ConfigSource;
|
14
14
|
import org.embulk.config.TaskReport;
|
15
15
|
import org.embulk.plugin.PluginType;
|
16
|
+
import org.embulk.spi.Buffer;
|
17
|
+
import org.embulk.spi.FileInputPlugin;
|
18
|
+
import org.embulk.spi.FileInputRunner;
|
16
19
|
import org.embulk.spi.Schema;
|
17
20
|
import org.embulk.spi.Page;
|
18
21
|
import org.embulk.spi.PageOutput;
|
@@ -85,10 +88,22 @@ public class PreviewExecutor
|
|
85
88
|
|
86
89
|
private PreviewResult doPreview(ConfigSource config)
|
87
90
|
{
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
+
PreviewTask task = config.loadConfig(PreviewTask.class);
|
92
|
+
InputPlugin inputPlugin = newInputPlugin(task);
|
93
|
+
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
94
|
+
|
95
|
+
if (inputPlugin instanceof FileInputRunner) { // file input runner
|
96
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"));
|
97
|
+
FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample));
|
98
|
+
return doPreview(task, previewRunner, filterPlugins);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
return doPreview(task, inputPlugin, filterPlugins);
|
102
|
+
}
|
103
|
+
}
|
91
104
|
|
105
|
+
private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins)
|
106
|
+
{
|
92
107
|
try {
|
93
108
|
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
94
109
|
public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
|
@@ -96,8 +111,6 @@ public class PreviewExecutor
|
|
96
111
|
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
97
112
|
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
98
113
|
{
|
99
|
-
InputPlugin input = newInputPlugin(task);
|
100
|
-
List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
101
114
|
Schema inputSchema = filterSchemas.get(0);
|
102
115
|
Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
|
103
116
|
|
@@ -83,9 +83,8 @@ public class FileInputRunner
|
|
83
83
|
public ConfigDiff guess(ConfigSource execConfig, ConfigSource config)
|
84
84
|
{
|
85
85
|
Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
|
86
|
-
|
87
|
-
|
88
|
-
}
|
86
|
+
// SamplingParserPlugin.runFileInputSampling throws NoSampleException if there're
|
87
|
+
// no files or all files are smaller than minSampleSize (40 bytes).
|
89
88
|
|
90
89
|
GuessExecutor guessExecutor = Exec.getInjector().getInstance(GuessExecutor.class);
|
91
90
|
return guessExecutor.guessParserConfig(sample, config, execConfig);
|
data/embulk-docs/build.gradle
CHANGED
@@ -9,24 +9,24 @@ dependencies {
|
|
9
9
|
jrubyExec 'rubygems:yard:0.8.7.6'
|
10
10
|
}
|
11
11
|
|
12
|
-
task
|
12
|
+
task sphinxHtml(type: Exec) {
|
13
13
|
workingDir '.'
|
14
14
|
commandLine 'make'
|
15
15
|
args 'html'
|
16
16
|
}
|
17
17
|
|
18
|
-
task
|
18
|
+
task javadocHtml(type: Copy, dependsOn: [':embulk-core:javadoc']) {
|
19
19
|
doFirst { file('build/html/javadoc').mkdirs() }
|
20
20
|
from project(':embulk-core').javadoc.destinationDir
|
21
21
|
into 'build/html/javadoc'
|
22
22
|
}
|
23
23
|
|
24
|
-
task
|
24
|
+
task rdocHtml(type: JRubyExec) {
|
25
25
|
workingDir '..'
|
26
26
|
jrubyArgs '-ryard', '-eYARD::CLI::Yardoc.run(*ARGV)'
|
27
27
|
script './lib/embulk/version.rb' // dummy
|
28
28
|
scriptArgs 'lib', '-o', 'embulk-docs/build/html/rdoc'
|
29
29
|
}
|
30
30
|
|
31
|
-
task site(type: Copy, dependsOn: ['
|
31
|
+
task site(type: Copy, dependsOn: ['sphinxHtml', 'rdocHtml', 'javadocHtml']) {
|
32
32
|
}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
Release 0.8.10
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed 'IllegalArgumentException: Self-suppression not permitted' error (@hata++) [#446]
|
8
|
+
|
9
|
+
* Fixed preview not to read the entire file when the parser doesn't produce records. Now preview reads first 32KB.
|
10
|
+
|
11
|
+
* Updated JRuby from 9.0.4.0 to 9.1.2.0. Release notes:
|
12
|
+
|
13
|
+
* http://jruby.org/2016/01/26/jruby-9-0-5-0.html
|
14
|
+
|
15
|
+
* http://jruby.org/2016/05/03/jruby-9-1-0-0.html
|
16
|
+
|
17
|
+
* http://jruby.org/2016/05/19/jruby-9-1-1-0.html
|
18
|
+
|
19
|
+
* http://jruby.org/2016/05/27/jruby-9-1-2-0.html
|
20
|
+
|
21
|
+
* Updated msgpack-java from 0.8.7 to 0.8.8. Release notes
|
22
|
+
|
23
|
+
* https://github.com/msgpack/msgpack-java/blob/0.8.8/RELEASE_NOTES.md
|
24
|
+
|
25
|
+
Built-in plugins
|
26
|
+
------------------
|
27
|
+
|
28
|
+
* ``csv`` parser plugin supports delimiters longer than 1 character.
|
29
|
+
|
30
|
+
* ``csv`` parser doesn't convert non-quoted empty string into NULL any more when null_string is set. Default behavior is not changed (convert non-quoted empty string into NULL).
|
31
|
+
|
32
|
+
|
33
|
+
Release Date
|
34
|
+
------------------
|
35
|
+
2016-07-21
|
@@ -57,7 +57,7 @@ public class CsvParserPlugin
|
|
57
57
|
|
58
58
|
@Config("delimiter")
|
59
59
|
@ConfigDefault("\",\"")
|
60
|
-
|
60
|
+
String getDelimiter();
|
61
61
|
|
62
62
|
@Config("quote")
|
63
63
|
@ConfigDefault("\"\\\"\"")
|
@@ -233,7 +233,6 @@ public class CsvParserPlugin
|
|
233
233
|
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
|
234
234
|
final JsonParser jsonParser = new JsonParser();
|
235
235
|
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
|
236
|
-
final String nullStringOrNull = task.getNullString().orNull();
|
237
236
|
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
238
237
|
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
239
238
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
@@ -344,17 +343,7 @@ public class CsvParserPlugin
|
|
344
343
|
//TODO warning
|
345
344
|
return null;
|
346
345
|
}
|
347
|
-
|
348
|
-
if (!v.isEmpty()) {
|
349
|
-
if (v.equals(nullStringOrNull)) {
|
350
|
-
return null;
|
351
|
-
}
|
352
|
-
return v;
|
353
|
-
} else if (tokenizer.wasQuotedColumn()) {
|
354
|
-
return "";
|
355
|
-
} else {
|
356
|
-
return null;
|
357
|
-
}
|
346
|
+
return tokenizer.nextColumnOrNull();
|
358
347
|
}
|
359
348
|
});
|
360
349
|
|
@@ -7,6 +7,7 @@ import java.util.Deque;
|
|
7
7
|
import java.util.ArrayDeque;
|
8
8
|
import org.embulk.spi.DataException;
|
9
9
|
import org.embulk.spi.util.LineDecoder;
|
10
|
+
import org.embulk.config.ConfigException;
|
10
11
|
|
11
12
|
public class CsvTokenizer
|
12
13
|
{
|
@@ -24,7 +25,8 @@ public class CsvTokenizer
|
|
24
25
|
static final char NO_QUOTE = '\0';
|
25
26
|
static final char NO_ESCAPE = '\0';
|
26
27
|
|
27
|
-
private final char
|
28
|
+
private final char delimiterChar;
|
29
|
+
private final String delimiterFollowingString;
|
28
30
|
private final char quote;
|
29
31
|
private final char escape;
|
30
32
|
private final String newline;
|
@@ -32,6 +34,7 @@ public class CsvTokenizer
|
|
32
34
|
private final long maxQuotedSizeLimit;
|
33
35
|
private final String commentLineMarker;
|
34
36
|
private final LineDecoder input;
|
37
|
+
private final String nullStringOrNull;
|
35
38
|
|
36
39
|
private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
|
37
40
|
private long lineNumber = 0;
|
@@ -44,13 +47,24 @@ public class CsvTokenizer
|
|
44
47
|
|
45
48
|
public CsvTokenizer(LineDecoder input, CsvParserPlugin.PluginTask task)
|
46
49
|
{
|
47
|
-
delimiter = task.
|
50
|
+
String delimiter = task.getDelimiter();
|
51
|
+
if (delimiter.length() == 0) {
|
52
|
+
throw new ConfigException("Empty delimiter is not allowed");
|
53
|
+
} else {
|
54
|
+
this.delimiterChar = delimiter.charAt(0);
|
55
|
+
if (delimiter.length() > 1) {
|
56
|
+
delimiterFollowingString = delimiter.substring(1);
|
57
|
+
} else {
|
58
|
+
delimiterFollowingString = null;
|
59
|
+
}
|
60
|
+
}
|
48
61
|
quote = task.getQuoteChar().or(CsvParserPlugin.QuoteCharacter.noQuote()).getCharacter();
|
49
62
|
escape = task.getEscapeChar().or(CsvParserPlugin.EscapeCharacter.noEscape()).getCharacter();
|
50
63
|
newline = task.getNewline().getString();
|
51
64
|
trimIfNotQuoted = task.getTrimIfNotQuoted();
|
52
65
|
maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
|
53
66
|
commentLineMarker = task.getCommentLineMarker().orNull();
|
67
|
+
nullStringOrNull = task.getNullString().orNull();
|
54
68
|
this.input = input;
|
55
69
|
}
|
56
70
|
|
@@ -91,7 +105,11 @@ public class CsvTokenizer
|
|
91
105
|
|
92
106
|
public boolean nextFile()
|
93
107
|
{
|
94
|
-
|
108
|
+
boolean next = input.nextFile();
|
109
|
+
if (next) {
|
110
|
+
lineNumber = 0;
|
111
|
+
}
|
112
|
+
return next;
|
95
113
|
}
|
96
114
|
|
97
115
|
// used by guess-csv
|
@@ -169,9 +187,15 @@ public class CsvTokenizer
|
|
169
187
|
// this block can be out of the looop.
|
170
188
|
if (isDelimiter(c)) {
|
171
189
|
// empty value
|
172
|
-
|
173
|
-
|
174
|
-
|
190
|
+
if (delimiterFollowingString == null) {
|
191
|
+
return "";
|
192
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
193
|
+
linePos += delimiterFollowingString.length();
|
194
|
+
return "";
|
195
|
+
}
|
196
|
+
// not a delimiter
|
197
|
+
}
|
198
|
+
if (isEndOfLine(c)) {
|
175
199
|
// empty value
|
176
200
|
recordState = RecordState.END;
|
177
201
|
return "";
|
@@ -193,9 +217,15 @@ public class CsvTokenizer
|
|
193
217
|
case FIRST_TRIM:
|
194
218
|
if (isDelimiter(c)) {
|
195
219
|
// empty value
|
196
|
-
|
197
|
-
|
198
|
-
|
220
|
+
if (delimiterFollowingString == null) {
|
221
|
+
return "";
|
222
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
223
|
+
linePos += delimiterFollowingString.length();
|
224
|
+
return "";
|
225
|
+
}
|
226
|
+
// not a delimiter
|
227
|
+
}
|
228
|
+
if (isEndOfLine(c)) {
|
199
229
|
// empty value
|
200
230
|
recordState = RecordState.END;
|
201
231
|
return "";
|
@@ -218,9 +248,16 @@ public class CsvTokenizer
|
|
218
248
|
|
219
249
|
case VALUE:
|
220
250
|
if (isDelimiter(c)) {
|
221
|
-
|
222
|
-
|
223
|
-
|
251
|
+
if (delimiterFollowingString == null) {
|
252
|
+
return line.substring(valueStartPos, linePos - 1);
|
253
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
254
|
+
String value = line.substring(valueStartPos, linePos - 1);
|
255
|
+
linePos += delimiterFollowingString.length();
|
256
|
+
return value;
|
257
|
+
}
|
258
|
+
// not a delimiter
|
259
|
+
}
|
260
|
+
if (isEndOfLine(c)) {
|
224
261
|
recordState = RecordState.END;
|
225
262
|
return line.substring(valueStartPos, linePos);
|
226
263
|
|
@@ -241,9 +278,16 @@ public class CsvTokenizer
|
|
241
278
|
|
242
279
|
case LAST_TRIM_OR_VALUE:
|
243
280
|
if (isDelimiter(c)) {
|
244
|
-
|
245
|
-
|
246
|
-
|
281
|
+
if (delimiterFollowingString == null) {
|
282
|
+
return line.substring(valueStartPos, valueEndPos);
|
283
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
284
|
+
linePos += delimiterFollowingString.length();
|
285
|
+
return line.substring(valueStartPos, valueEndPos);
|
286
|
+
} else {
|
287
|
+
// not a delimiter
|
288
|
+
}
|
289
|
+
}
|
290
|
+
if (isEndOfLine(c)) {
|
247
291
|
recordState = RecordState.END;
|
248
292
|
return line.substring(valueStartPos, valueEndPos);
|
249
293
|
|
@@ -304,9 +348,15 @@ public class CsvTokenizer
|
|
304
348
|
|
305
349
|
case AFTER_QUOTED_VALUE:
|
306
350
|
if (isDelimiter(c)) {
|
307
|
-
|
308
|
-
|
309
|
-
|
351
|
+
if (delimiterFollowingString == null) {
|
352
|
+
return quotedValue.toString();
|
353
|
+
} else if (isDelimiterFollowingFrom(linePos)) {
|
354
|
+
linePos += delimiterFollowingString.length();
|
355
|
+
return quotedValue.toString();
|
356
|
+
}
|
357
|
+
// not a delimiter
|
358
|
+
}
|
359
|
+
if (isEndOfLine(c)) {
|
310
360
|
recordState = RecordState.END;
|
311
361
|
return quotedValue.toString();
|
312
362
|
|
@@ -324,6 +374,32 @@ public class CsvTokenizer
|
|
324
374
|
}
|
325
375
|
}
|
326
376
|
|
377
|
+
public String nextColumnOrNull()
|
378
|
+
{
|
379
|
+
String v = nextColumn();
|
380
|
+
if (nullStringOrNull == null) {
|
381
|
+
if (v.isEmpty()) {
|
382
|
+
if (wasQuotedColumn) {
|
383
|
+
return "";
|
384
|
+
}
|
385
|
+
else {
|
386
|
+
return null;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
else {
|
390
|
+
return v;
|
391
|
+
}
|
392
|
+
}
|
393
|
+
else {
|
394
|
+
if (v.equals(nullStringOrNull)) {
|
395
|
+
return null;
|
396
|
+
}
|
397
|
+
else {
|
398
|
+
return v;
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
327
403
|
public boolean wasQuotedColumn()
|
328
404
|
{
|
329
405
|
return wasQuotedColumn;
|
@@ -356,9 +432,22 @@ public class CsvTokenizer
|
|
356
432
|
return c == ' ';
|
357
433
|
}
|
358
434
|
|
435
|
+
private boolean isDelimiterFollowingFrom(int pos)
|
436
|
+
{
|
437
|
+
if (line.length() < pos + delimiterFollowingString.length()) {
|
438
|
+
return false;
|
439
|
+
}
|
440
|
+
for (int i = 0; i < delimiterFollowingString.length(); i++) {
|
441
|
+
if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
|
442
|
+
return false;
|
443
|
+
}
|
444
|
+
}
|
445
|
+
return true;
|
446
|
+
}
|
447
|
+
|
359
448
|
private boolean isDelimiter(char c)
|
360
449
|
{
|
361
|
-
return c ==
|
450
|
+
return c == delimiterChar;
|
362
451
|
}
|
363
452
|
|
364
453
|
private boolean isEndOfLine(char c)
|
@@ -33,7 +33,7 @@ public class TestCsvParserPlugin
|
|
33
33
|
assertEquals(Charset.forName("utf-8"), task.getCharset());
|
34
34
|
assertEquals(Newline.CRLF, task.getNewline());
|
35
35
|
assertEquals(false, task.getHeaderLine().or(false));
|
36
|
-
assertEquals(
|
36
|
+
assertEquals(",", task.getDelimiter());
|
37
37
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
|
38
38
|
assertEquals(false, task.getAllowOptionalColumns());
|
39
39
|
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
@@ -68,7 +68,7 @@ public class TestCsvParserPlugin
|
|
68
68
|
assertEquals(Charset.forName("utf-16"), task.getCharset());
|
69
69
|
assertEquals(Newline.LF, task.getNewline());
|
70
70
|
assertEquals(true, task.getHeaderLine().or(false));
|
71
|
-
assertEquals(
|
71
|
+
assertEquals("\t", task.getDelimiter());
|
72
72
|
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
|
73
73
|
assertEquals(true, task.getAllowOptionalColumns());
|
74
74
|
}
|
@@ -88,12 +88,8 @@ public class TestCsvTokenizer
|
|
88
88
|
while (tokenizer.nextRecord()) {
|
89
89
|
List<String> record = new ArrayList<>();
|
90
90
|
for (Column c : schema.getColumns()) {
|
91
|
-
String v = tokenizer.
|
92
|
-
|
93
|
-
record.add(v);
|
94
|
-
} else {
|
95
|
-
record.add(tokenizer.wasQuotedColumn() ? "" : null);
|
96
|
-
}
|
91
|
+
String v = tokenizer.nextColumnOrNull();
|
92
|
+
record.add(v);
|
97
93
|
}
|
98
94
|
records.add(record);
|
99
95
|
}
|
@@ -202,6 +198,31 @@ public class TestCsvTokenizer
|
|
202
198
|
"ccc\tddd"));
|
203
199
|
}
|
204
200
|
|
201
|
+
@Test
|
202
|
+
public void testDefaultNullString() throws Exception
|
203
|
+
{
|
204
|
+
reloadPluginTask();
|
205
|
+
assertEquals(expectedRecords(2,
|
206
|
+
null, "",
|
207
|
+
"NULL", "NULL"),
|
208
|
+
parse(task,
|
209
|
+
",\"\"",
|
210
|
+
"NULL,\"NULL\""));
|
211
|
+
}
|
212
|
+
|
213
|
+
@Test
|
214
|
+
public void testChangeNullString() throws Exception
|
215
|
+
{
|
216
|
+
config.set("null_string", "NULL");
|
217
|
+
reloadPluginTask();
|
218
|
+
assertEquals(expectedRecords(2,
|
219
|
+
"", "",
|
220
|
+
null, null),
|
221
|
+
parse(task,
|
222
|
+
",\"\"",
|
223
|
+
"NULL,\"NULL\""));
|
224
|
+
}
|
225
|
+
|
205
226
|
@Test
|
206
227
|
public void testQuotedValues() throws Exception
|
207
228
|
{
|
data/embulk.gemspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.1.2.0
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,127 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 1.10.6
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ">="
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: 1.10.6
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: msgpack
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- - "
|
24
|
+
- - ">="
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
26
|
+
version: 1.10.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - "~>"
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: 0.7.3
|
33
|
+
name: msgpack
|
39
34
|
prerelease: false
|
40
35
|
type: :runtime
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: liquid
|
43
36
|
version_requirements: !ruby/object:Gem::Requirement
|
44
37
|
requirements:
|
45
38
|
- - "~>"
|
46
39
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
40
|
+
version: 0.7.3
|
41
|
+
- !ruby/object:Gem::Dependency
|
48
42
|
requirement: !ruby/object:Gem::Requirement
|
49
43
|
requirements:
|
50
44
|
- - "~>"
|
51
45
|
- !ruby/object:Gem::Version
|
52
46
|
version: 3.0.6
|
47
|
+
name: liquid
|
53
48
|
prerelease: false
|
54
49
|
type: :runtime
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rjack-icu
|
57
50
|
version_requirements: !ruby/object:Gem::Requirement
|
58
51
|
requirements:
|
59
52
|
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
54
|
+
version: 3.0.6
|
55
|
+
- !ruby/object:Gem::Dependency
|
62
56
|
requirement: !ruby/object:Gem::Requirement
|
63
57
|
requirements:
|
64
58
|
- - "~>"
|
65
59
|
- !ruby/object:Gem::Version
|
66
60
|
version: 4.54.1.1
|
61
|
+
name: rjack-icu
|
67
62
|
prerelease: false
|
68
63
|
type: :runtime
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rake
|
71
64
|
version_requirements: !ruby/object:Gem::Requirement
|
72
65
|
requirements:
|
73
|
-
- - "
|
66
|
+
- - "~>"
|
74
67
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
68
|
+
version: 4.54.1.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
76
70
|
requirement: !ruby/object:Gem::Requirement
|
77
71
|
requirements:
|
78
72
|
- - ">="
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: 0.10.0
|
75
|
+
name: rake
|
81
76
|
prerelease: false
|
82
77
|
type: :development
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: test-unit
|
85
78
|
version_requirements: !ruby/object:Gem::Requirement
|
86
79
|
requirements:
|
87
|
-
- - "
|
80
|
+
- - ">="
|
88
81
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
82
|
+
version: 0.10.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
90
84
|
requirement: !ruby/object:Gem::Requirement
|
91
85
|
requirements:
|
92
86
|
- - "~>"
|
93
87
|
- !ruby/object:Gem::Version
|
94
88
|
version: 3.0.9
|
89
|
+
name: test-unit
|
95
90
|
prerelease: false
|
96
91
|
type: :development
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: yard
|
99
92
|
version_requirements: !ruby/object:Gem::Requirement
|
100
93
|
requirements:
|
101
94
|
- - "~>"
|
102
95
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.
|
96
|
+
version: 3.0.9
|
97
|
+
- !ruby/object:Gem::Dependency
|
104
98
|
requirement: !ruby/object:Gem::Requirement
|
105
99
|
requirements:
|
106
100
|
- - "~>"
|
107
101
|
- !ruby/object:Gem::Version
|
108
102
|
version: 0.8.7
|
103
|
+
name: yard
|
109
104
|
prerelease: false
|
110
105
|
type: :development
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: kramdown
|
113
106
|
version_requirements: !ruby/object:Gem::Requirement
|
114
107
|
requirements:
|
115
108
|
- - "~>"
|
116
109
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
110
|
+
version: 0.8.7
|
111
|
+
- !ruby/object:Gem::Dependency
|
118
112
|
requirement: !ruby/object:Gem::Requirement
|
119
113
|
requirements:
|
120
114
|
- - "~>"
|
121
115
|
- !ruby/object:Gem::Version
|
122
116
|
version: 1.5.0
|
117
|
+
name: kramdown
|
123
118
|
prerelease: false
|
124
119
|
type: :development
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.5.0
|
125
125
|
description: Embulk is an open-source, plugin-based bulk data loader to scale and simplify data management across heterogeneous data stores. It can collect and ship any kinds of data in high throughput with transaction control.
|
126
126
|
email:
|
127
127
|
- frsyuki@gmail.com
|
@@ -148,9 +148,9 @@ files:
|
|
148
148
|
- classpath/commons-beanutils-core-1.8.3.jar
|
149
149
|
- classpath/commons-compress-1.10.jar
|
150
150
|
- classpath/commons-lang3-3.1.jar
|
151
|
-
- classpath/embulk-cli-0.8.
|
152
|
-
- classpath/embulk-core-0.8.
|
153
|
-
- classpath/embulk-standards-0.8.
|
151
|
+
- classpath/embulk-cli-0.8.10.jar
|
152
|
+
- classpath/embulk-core-0.8.10.jar
|
153
|
+
- classpath/embulk-standards-0.8.10.jar
|
154
154
|
- classpath/guava-18.0.jar
|
155
155
|
- classpath/guice-4.0.jar
|
156
156
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -166,7 +166,7 @@ files:
|
|
166
166
|
- classpath/joda-time-2.9.2.jar
|
167
167
|
- classpath/logback-classic-1.1.3.jar
|
168
168
|
- classpath/logback-core-1.1.3.jar
|
169
|
-
- classpath/msgpack-core-0.8.
|
169
|
+
- classpath/msgpack-core-0.8.8.jar
|
170
170
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
171
171
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
172
172
|
- classpath/slf4j-api-1.7.12.jar
|
@@ -207,6 +207,7 @@ files:
|
|
207
207
|
- embulk-core/src/main/java/org/embulk/config/UserDataException.java
|
208
208
|
- embulk-core/src/main/java/org/embulk/config/UserDataExceptions.java
|
209
209
|
- embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java
|
210
|
+
- embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java
|
210
211
|
- embulk-core/src/main/java/org/embulk/exec/BulkLoader.java
|
211
212
|
- embulk-core/src/main/java/org/embulk/exec/ConfigurableGuessInputPlugin.java
|
212
213
|
- embulk-core/src/main/java/org/embulk/exec/ExecModule.java
|
@@ -462,6 +463,7 @@ files:
|
|
462
463
|
- embulk-docs/src/release/release-0.7.9.rst
|
463
464
|
- embulk-docs/src/release/release-0.8.0.rst
|
464
465
|
- embulk-docs/src/release/release-0.8.1.rst
|
466
|
+
- embulk-docs/src/release/release-0.8.10.rst
|
465
467
|
- embulk-docs/src/release/release-0.8.2.rst
|
466
468
|
- embulk-docs/src/release/release-0.8.3.rst
|
467
469
|
- embulk-docs/src/release/release-0.8.4.rst
|
@@ -609,7 +611,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
609
611
|
version: '0'
|
610
612
|
requirements: []
|
611
613
|
rubyforge_project:
|
612
|
-
rubygems_version: 2.4
|
614
|
+
rubygems_version: 2.6.4
|
613
615
|
signing_key:
|
614
616
|
specification_version: 4
|
615
617
|
summary: Embulk, a plugin-based parallel bulk data loader
|