embulk 0.8.9 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2f4ce6246c951b41b33a123f74e33d54324a865
4
- data.tar.gz: 7c7bb33676ac7bcad4bc03dec3b843242b5ebf15
3
+ metadata.gz: 7a7102ccd2c44833976bac88ae9d0e9817be8743
4
+ data.tar.gz: 77b0075a55fe8afb19da2afb7f1cb7ce7e7ecb90
5
5
  SHA512:
6
- metadata.gz: e4921463f2ec39d4f47981692059f2221eb98003d94a063a09b0fda6cae4fb697053a5199ce2abfc31d29e43d172d24b01eacacbb1b287e80f0a93abe4d698af
7
- data.tar.gz: bb264a3a0528502f42222d6c86ec8d5d06214bdfd7bb29c06e9c1fd9252900a7a93b70c1316423a7338672ec05bff6382c1412fed8585a4461ebc1d9340b2997
6
+ metadata.gz: 17bf7846353ee95ea5f8378534ea89d44ec18e2bb10a06073c803bef75bb8b2563437fa4d752f6e506569a78ab80018abb6c66b146ca144cf9bc3e18b08b44ba
7
+ data.tar.gz: df8153562d5d64f596ba1a004e38dd2461bbd0809be27013d20d2cc25fd8da4b35a69fde260531ac8cb7b323a1f12232b8c7b7b4f5470471f3b7a8ca2fa80012
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- embulk (0.8.0)
5
- jruby-jars (= 9.0.4.0)
4
+ embulk (0.8.9)
5
+ jruby-jars (= 9.1.2.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- jruby-jars (9.0.4.0)
10
+ jruby-jars (9.1.2.0)
11
11
  kramdown (1.5.0)
12
12
  power_assert (0.2.2)
13
13
  rake (10.4.2)
@@ -27,4 +27,4 @@ DEPENDENCIES
27
27
  yard (~> 0.8.7)
28
28
 
29
29
  BUNDLED WITH
30
- 1.10.6
30
+ 1.12.4
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.8.9'
19
+ version = '0.8.10'
20
20
 
21
21
  ext {
22
- jrubyVersion = '9.0.5.0'
22
+ jrubyVersion = '9.1.2.0'
23
23
  }
24
24
 
25
25
  apply plugin: 'java'
@@ -38,7 +38,7 @@ dependencies {
38
38
  compile 'joda-time:joda-time:2.9.2'
39
39
  compile 'io.netty:netty-buffer:5.0.0.Alpha1'
40
40
  compile 'org.fusesource.jansi:jansi:1.11'
41
- compile 'org.msgpack:msgpack-core:0.8.7'
41
+ compile 'org.msgpack:msgpack-core:0.8.8'
42
42
 
43
43
  // For embulk/guess/charset.rb. See also embulk.gemspec
44
44
  compile 'com.ibm.icu:icu4j:54.1.1'
@@ -0,0 +1,88 @@
1
+ package org.embulk.exec;
2
+
3
+ import org.embulk.config.ConfigDiff;
4
+ import org.embulk.config.ConfigSource;
5
+ import org.embulk.config.TaskReport;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.spi.Buffer;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.spi.FileInputPlugin;
10
+ import org.embulk.spi.TransactionalFileInput;
11
+
12
+ import java.util.List;
13
+
14
+ public class BufferFileInputPlugin
15
+ implements FileInputPlugin
16
+ {
17
+ private Buffer buffer;
18
+
19
+ public BufferFileInputPlugin(Buffer buffer)
20
+ {
21
+ this.buffer = buffer;
22
+ }
23
+
24
+ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
25
+ {
26
+ control.run(Exec.newTaskSource(), 1);
27
+ return Exec.newConfigDiff();
28
+ }
29
+
30
+ public ConfigDiff resume(TaskSource taskSource,
31
+ int taskCount,
32
+ FileInputPlugin.Control control)
33
+ {
34
+ throw new UnsupportedOperationException();
35
+ }
36
+
37
+ public void cleanup(TaskSource taskSource,
38
+ int taskCount,
39
+ List<TaskReport> successTaskReports)
40
+ {
41
+ if (buffer != null) {
42
+ buffer.release();
43
+ buffer = null;
44
+ }
45
+ }
46
+
47
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
48
+ {
49
+ return new BufferTransactionalFileInput(buffer);
50
+ }
51
+
52
+ private static class BufferTransactionalFileInput
53
+ implements TransactionalFileInput
54
+ {
55
+ private Buffer buffer;
56
+
57
+ public BufferTransactionalFileInput(Buffer buffer)
58
+ {
59
+ this.buffer = buffer;
60
+ }
61
+
62
+ @Override
63
+ public Buffer poll()
64
+ {
65
+ Buffer b = buffer;
66
+ buffer = null;
67
+ return b;
68
+ }
69
+
70
+ @Override
71
+ public boolean nextFile()
72
+ {
73
+ return buffer != null;
74
+ }
75
+
76
+ @Override
77
+ public void close() { }
78
+
79
+ @Override
80
+ public void abort() { }
81
+
82
+ @Override
83
+ public TaskReport commit()
84
+ {
85
+ return null;
86
+ }
87
+ }
88
+ }
@@ -31,7 +31,7 @@ public class ExecModule
31
31
  binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
32
32
  binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
33
33
 
34
- // GuessExecutor
34
+ // GuessExecutor, PreviewExecutor
35
35
  registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
36
36
  registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
37
37
 
@@ -191,82 +191,6 @@ public class GuessExecutor
191
191
  return lastGuessed;
192
192
  }
193
193
 
194
- private static class BufferFileInputPlugin
195
- implements FileInputPlugin
196
- {
197
- private Buffer buffer;
198
-
199
- public BufferFileInputPlugin(Buffer buffer)
200
- {
201
- this.buffer = buffer;
202
- }
203
-
204
- public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
205
- {
206
- control.run(Exec.newTaskSource(), 1);
207
- return Exec.newConfigDiff();
208
- }
209
-
210
- public ConfigDiff resume(TaskSource taskSource,
211
- int taskCount,
212
- FileInputPlugin.Control control)
213
- {
214
- throw new UnsupportedOperationException();
215
- }
216
-
217
- public void cleanup(TaskSource taskSource,
218
- int taskCount,
219
- List<TaskReport> successTaskReports)
220
- {
221
- if (buffer != null) {
222
- buffer.release();
223
- buffer = null;
224
- }
225
- }
226
-
227
- public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
228
- {
229
- return new BufferTransactionalFileInput(buffer);
230
- }
231
- }
232
-
233
- private static class BufferTransactionalFileInput
234
- implements TransactionalFileInput
235
- {
236
- private Buffer buffer;
237
-
238
- public BufferTransactionalFileInput(Buffer buffer)
239
- {
240
- this.buffer = buffer;
241
- }
242
-
243
- @Override
244
- public Buffer poll()
245
- {
246
- Buffer b = buffer;
247
- buffer = null;
248
- return b;
249
- }
250
-
251
- @Override
252
- public boolean nextFile()
253
- {
254
- return buffer != null;
255
- }
256
-
257
- @Override
258
- public void close() { }
259
-
260
- @Override
261
- public void abort() { }
262
-
263
- @Override
264
- public TaskReport commit()
265
- {
266
- return null;
267
- }
268
- }
269
-
270
194
  public static class GuessParserPlugin
271
195
  implements ParserPlugin
272
196
  {
@@ -301,6 +301,7 @@ public class LocalExecutorPlugin
301
301
 
302
302
  // outputCommitted
303
303
  tran.commit();
304
+ aborter.dontAbort();
304
305
  }
305
306
  }
306
307
  finally {
@@ -551,6 +552,7 @@ public class LocalExecutorPlugin
551
552
  catch (InterruptedException ex) {
552
553
  error = ex;
553
554
  }
555
+ outputWorkers[i] = null;
554
556
  if (error != null) {
555
557
  throw Throwables.propagate(error);
556
558
  }
@@ -13,6 +13,9 @@ import org.embulk.config.TaskSource;
13
13
  import org.embulk.config.ConfigSource;
14
14
  import org.embulk.config.TaskReport;
15
15
  import org.embulk.plugin.PluginType;
16
+ import org.embulk.spi.Buffer;
17
+ import org.embulk.spi.FileInputPlugin;
18
+ import org.embulk.spi.FileInputRunner;
16
19
  import org.embulk.spi.Schema;
17
20
  import org.embulk.spi.Page;
18
21
  import org.embulk.spi.PageOutput;
@@ -85,10 +88,22 @@ public class PreviewExecutor
85
88
 
86
89
  private PreviewResult doPreview(ConfigSource config)
87
90
  {
88
- final PreviewTask task = config.loadConfig(PreviewTask.class);
89
- final InputPlugin input = newInputPlugin(task);
90
- final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
91
+ PreviewTask task = config.loadConfig(PreviewTask.class);
92
+ InputPlugin inputPlugin = newInputPlugin(task);
93
+ List<FilterPlugin> filterPlugins = newFilterPlugins(task);
94
+
95
+ if (inputPlugin instanceof FileInputRunner) { // file input runner
96
+ Buffer sample = SamplingParserPlugin.runFileInputSampling((FileInputRunner)inputPlugin, config.getNested("in"));
97
+ FileInputRunner previewRunner = new FileInputRunner(new BufferFileInputPlugin(sample));
98
+ return doPreview(task, previewRunner, filterPlugins);
99
+ }
100
+ else {
101
+ return doPreview(task, inputPlugin, filterPlugins);
102
+ }
103
+ }
91
104
 
105
+ private PreviewResult doPreview(final PreviewTask task, final InputPlugin input, final List<FilterPlugin> filterPlugins)
106
+ {
92
107
  try {
93
108
  input.transaction(task.getInputConfig(), new InputPlugin.Control() {
94
109
  public List<TaskReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
@@ -96,8 +111,6 @@ public class PreviewExecutor
96
111
  Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
97
112
  public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
98
113
  {
99
- InputPlugin input = newInputPlugin(task);
100
- List<FilterPlugin> filterPlugins = newFilterPlugins(task);
101
114
  Schema inputSchema = filterSchemas.get(0);
102
115
  Schema outputSchema = filterSchemas.get(filterSchemas.size() - 1);
103
116
 
@@ -83,9 +83,8 @@ public class FileInputRunner
83
83
  public ConfigDiff guess(ConfigSource execConfig, ConfigSource config)
84
84
  {
85
85
  Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
86
- if (sample.limit() == 0) {
87
- throw new NoSampleException("Can't get sample data because the first input file is empty");
88
- }
86
+ // SamplingParserPlugin.runFileInputSampling throws NoSampleException if there're
87
+ // no files or all files are smaller than minSampleSize (40 bytes).
89
88
 
90
89
  GuessExecutor guessExecutor = Exec.getInjector().getInstance(GuessExecutor.class);
91
90
  return guessExecutor.guessParserConfig(sample, config, execConfig);
@@ -9,24 +9,24 @@ dependencies {
9
9
  jrubyExec 'rubygems:yard:0.8.7.6'
10
10
  }
11
11
 
12
- task sphinx_html(type: Exec) {
12
+ task sphinxHtml(type: Exec) {
13
13
  workingDir '.'
14
14
  commandLine 'make'
15
15
  args 'html'
16
16
  }
17
17
 
18
- task javadoc_html(type: Copy, dependsOn: [':embulk-core:javadoc']) {
18
+ task javadocHtml(type: Copy, dependsOn: [':embulk-core:javadoc']) {
19
19
  doFirst { file('build/html/javadoc').mkdirs() }
20
20
  from project(':embulk-core').javadoc.destinationDir
21
21
  into 'build/html/javadoc'
22
22
  }
23
23
 
24
- task rdoc_html(type: JRubyExec) {
24
+ task rdocHtml(type: JRubyExec) {
25
25
  workingDir '..'
26
26
  jrubyArgs '-ryard', '-eYARD::CLI::Yardoc.run(*ARGV)'
27
27
  script './lib/embulk/version.rb' // dummy
28
28
  scriptArgs 'lib', '-o', 'embulk-docs/build/html/rdoc'
29
29
  }
30
30
 
31
- task site(type: Copy, dependsOn: ['sphinx_html', 'rdoc_html', 'javadoc_html']) {
31
+ task site(type: Copy, dependsOn: ['sphinxHtml', 'rdocHtml', 'javadocHtml']) {
32
32
  }
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.10
7
8
  release/release-0.8.9
8
9
  release/release-0.8.8
9
10
  release/release-0.8.7
@@ -0,0 +1,35 @@
1
+ Release 0.8.10
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Fixed 'IllegalArgumentException: Self-suppression not permitted' error (@hata++) [#446]
8
+
9
+ * Fixed preview not to read the entire file when the parser doesn't produce records. Now preview reads first 32KB.
10
+
11
+ * Updated JRuby from 9.0.4.0 to 9.1.2.0. Release notes:
12
+
13
+ * http://jruby.org/2016/01/26/jruby-9-0-5-0.html
14
+
15
+ * http://jruby.org/2016/05/03/jruby-9-1-0-0.html
16
+
17
+ * http://jruby.org/2016/05/19/jruby-9-1-1-0.html
18
+
19
+ * http://jruby.org/2016/05/27/jruby-9-1-2-0.html
20
+
21
+ * Updated msgpack-java from 0.8.7 to 0.8.8. Release notes
22
+
23
+ * https://github.com/msgpack/msgpack-java/blob/0.8.8/RELEASE_NOTES.md
24
+
25
+ Built-in plugins
26
+ ------------------
27
+
28
+ * ``csv`` parser plugin supports delimiters longer than 1 character.
29
+
30
+ * ``csv`` parser doesn't convert non-quoted empty string into NULL any more when null_string is set. Default behavior is not changed (convert non-quoted empty string into NULL).
31
+
32
+
33
+ Release Date
34
+ ------------------
35
+ 2016-07-21
@@ -57,7 +57,7 @@ public class CsvParserPlugin
57
57
 
58
58
  @Config("delimiter")
59
59
  @ConfigDefault("\",\"")
60
- char getDelimiterChar();
60
+ String getDelimiter();
61
61
 
62
62
  @Config("quote")
63
63
  @ConfigDefault("\"\\\"\"")
@@ -233,7 +233,6 @@ public class CsvParserPlugin
233
233
  final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
234
234
  final JsonParser jsonParser = new JsonParser();
235
235
  final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
236
- final String nullStringOrNull = task.getNullString().orNull();
237
236
  final boolean allowOptionalColumns = task.getAllowOptionalColumns();
238
237
  final boolean allowExtraColumns = task.getAllowExtraColumns();
239
238
  final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
@@ -344,17 +343,7 @@ public class CsvParserPlugin
344
343
  //TODO warning
345
344
  return null;
346
345
  }
347
- String v = tokenizer.nextColumn();
348
- if (!v.isEmpty()) {
349
- if (v.equals(nullStringOrNull)) {
350
- return null;
351
- }
352
- return v;
353
- } else if (tokenizer.wasQuotedColumn()) {
354
- return "";
355
- } else {
356
- return null;
357
- }
346
+ return tokenizer.nextColumnOrNull();
358
347
  }
359
348
  });
360
349
 
@@ -7,6 +7,7 @@ import java.util.Deque;
7
7
  import java.util.ArrayDeque;
8
8
  import org.embulk.spi.DataException;
9
9
  import org.embulk.spi.util.LineDecoder;
10
+ import org.embulk.config.ConfigException;
10
11
 
11
12
  public class CsvTokenizer
12
13
  {
@@ -24,7 +25,8 @@ public class CsvTokenizer
24
25
  static final char NO_QUOTE = '\0';
25
26
  static final char NO_ESCAPE = '\0';
26
27
 
27
- private final char delimiter;
28
+ private final char delimiterChar;
29
+ private final String delimiterFollowingString;
28
30
  private final char quote;
29
31
  private final char escape;
30
32
  private final String newline;
@@ -32,6 +34,7 @@ public class CsvTokenizer
32
34
  private final long maxQuotedSizeLimit;
33
35
  private final String commentLineMarker;
34
36
  private final LineDecoder input;
37
+ private final String nullStringOrNull;
35
38
 
36
39
  private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
37
40
  private long lineNumber = 0;
@@ -44,13 +47,24 @@ public class CsvTokenizer
44
47
 
45
48
  public CsvTokenizer(LineDecoder input, CsvParserPlugin.PluginTask task)
46
49
  {
47
- delimiter = task.getDelimiterChar();
50
+ String delimiter = task.getDelimiter();
51
+ if (delimiter.length() == 0) {
52
+ throw new ConfigException("Empty delimiter is not allowed");
53
+ } else {
54
+ this.delimiterChar = delimiter.charAt(0);
55
+ if (delimiter.length() > 1) {
56
+ delimiterFollowingString = delimiter.substring(1);
57
+ } else {
58
+ delimiterFollowingString = null;
59
+ }
60
+ }
48
61
  quote = task.getQuoteChar().or(CsvParserPlugin.QuoteCharacter.noQuote()).getCharacter();
49
62
  escape = task.getEscapeChar().or(CsvParserPlugin.EscapeCharacter.noEscape()).getCharacter();
50
63
  newline = task.getNewline().getString();
51
64
  trimIfNotQuoted = task.getTrimIfNotQuoted();
52
65
  maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
53
66
  commentLineMarker = task.getCommentLineMarker().orNull();
67
+ nullStringOrNull = task.getNullString().orNull();
54
68
  this.input = input;
55
69
  }
56
70
 
@@ -91,7 +105,11 @@ public class CsvTokenizer
91
105
 
92
106
  public boolean nextFile()
93
107
  {
94
- return input.nextFile();
108
+ boolean next = input.nextFile();
109
+ if (next) {
110
+ lineNumber = 0;
111
+ }
112
+ return next;
95
113
  }
96
114
 
97
115
  // used by guess-csv
@@ -169,9 +187,15 @@ public class CsvTokenizer
169
187
  // this block can be out of the looop.
170
188
  if (isDelimiter(c)) {
171
189
  // empty value
172
- return "";
173
-
174
- } else if (isEndOfLine(c)) {
190
+ if (delimiterFollowingString == null) {
191
+ return "";
192
+ } else if (isDelimiterFollowingFrom(linePos)) {
193
+ linePos += delimiterFollowingString.length();
194
+ return "";
195
+ }
196
+ // not a delimiter
197
+ }
198
+ if (isEndOfLine(c)) {
175
199
  // empty value
176
200
  recordState = RecordState.END;
177
201
  return "";
@@ -193,9 +217,15 @@ public class CsvTokenizer
193
217
  case FIRST_TRIM:
194
218
  if (isDelimiter(c)) {
195
219
  // empty value
196
- return "";
197
-
198
- } else if (isEndOfLine(c)) {
220
+ if (delimiterFollowingString == null) {
221
+ return "";
222
+ } else if (isDelimiterFollowingFrom(linePos)) {
223
+ linePos += delimiterFollowingString.length();
224
+ return "";
225
+ }
226
+ // not a delimiter
227
+ }
228
+ if (isEndOfLine(c)) {
199
229
  // empty value
200
230
  recordState = RecordState.END;
201
231
  return "";
@@ -218,9 +248,16 @@ public class CsvTokenizer
218
248
 
219
249
  case VALUE:
220
250
  if (isDelimiter(c)) {
221
- return line.substring(valueStartPos, linePos - 1);
222
-
223
- } else if (isEndOfLine(c)) {
251
+ if (delimiterFollowingString == null) {
252
+ return line.substring(valueStartPos, linePos - 1);
253
+ } else if (isDelimiterFollowingFrom(linePos)) {
254
+ String value = line.substring(valueStartPos, linePos - 1);
255
+ linePos += delimiterFollowingString.length();
256
+ return value;
257
+ }
258
+ // not a delimiter
259
+ }
260
+ if (isEndOfLine(c)) {
224
261
  recordState = RecordState.END;
225
262
  return line.substring(valueStartPos, linePos);
226
263
 
@@ -241,9 +278,16 @@ public class CsvTokenizer
241
278
 
242
279
  case LAST_TRIM_OR_VALUE:
243
280
  if (isDelimiter(c)) {
244
- return line.substring(valueStartPos, valueEndPos);
245
-
246
- } else if (isEndOfLine(c)) {
281
+ if (delimiterFollowingString == null) {
282
+ return line.substring(valueStartPos, valueEndPos);
283
+ } else if (isDelimiterFollowingFrom(linePos)) {
284
+ linePos += delimiterFollowingString.length();
285
+ return line.substring(valueStartPos, valueEndPos);
286
+ } else {
287
+ // not a delimiter
288
+ }
289
+ }
290
+ if (isEndOfLine(c)) {
247
291
  recordState = RecordState.END;
248
292
  return line.substring(valueStartPos, valueEndPos);
249
293
 
@@ -304,9 +348,15 @@ public class CsvTokenizer
304
348
 
305
349
  case AFTER_QUOTED_VALUE:
306
350
  if (isDelimiter(c)) {
307
- return quotedValue.toString();
308
-
309
- } else if (isEndOfLine(c)) {
351
+ if (delimiterFollowingString == null) {
352
+ return quotedValue.toString();
353
+ } else if (isDelimiterFollowingFrom(linePos)) {
354
+ linePos += delimiterFollowingString.length();
355
+ return quotedValue.toString();
356
+ }
357
+ // not a delimiter
358
+ }
359
+ if (isEndOfLine(c)) {
310
360
  recordState = RecordState.END;
311
361
  return quotedValue.toString();
312
362
 
@@ -324,6 +374,32 @@ public class CsvTokenizer
324
374
  }
325
375
  }
326
376
 
377
+ public String nextColumnOrNull()
378
+ {
379
+ String v = nextColumn();
380
+ if (nullStringOrNull == null) {
381
+ if (v.isEmpty()) {
382
+ if (wasQuotedColumn) {
383
+ return "";
384
+ }
385
+ else {
386
+ return null;
387
+ }
388
+ }
389
+ else {
390
+ return v;
391
+ }
392
+ }
393
+ else {
394
+ if (v.equals(nullStringOrNull)) {
395
+ return null;
396
+ }
397
+ else {
398
+ return v;
399
+ }
400
+ }
401
+ }
402
+
327
403
  public boolean wasQuotedColumn()
328
404
  {
329
405
  return wasQuotedColumn;
@@ -356,9 +432,22 @@ public class CsvTokenizer
356
432
  return c == ' ';
357
433
  }
358
434
 
435
+ private boolean isDelimiterFollowingFrom(int pos)
436
+ {
437
+ if (line.length() < pos + delimiterFollowingString.length()) {
438
+ return false;
439
+ }
440
+ for (int i = 0; i < delimiterFollowingString.length(); i++) {
441
+ if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
442
+ return false;
443
+ }
444
+ }
445
+ return true;
446
+ }
447
+
359
448
  private boolean isDelimiter(char c)
360
449
  {
361
- return c == delimiter;
450
+ return c == delimiterChar;
362
451
  }
363
452
 
364
453
  private boolean isEndOfLine(char c)
@@ -33,7 +33,7 @@ public class TestCsvParserPlugin
33
33
  assertEquals(Charset.forName("utf-8"), task.getCharset());
34
34
  assertEquals(Newline.CRLF, task.getNewline());
35
35
  assertEquals(false, task.getHeaderLine().or(false));
36
- assertEquals(',', task.getDelimiterChar());
36
+ assertEquals(",", task.getDelimiter());
37
37
  assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
38
38
  assertEquals(false, task.getAllowOptionalColumns());
39
39
  assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
@@ -68,7 +68,7 @@ public class TestCsvParserPlugin
68
68
  assertEquals(Charset.forName("utf-16"), task.getCharset());
69
69
  assertEquals(Newline.LF, task.getNewline());
70
70
  assertEquals(true, task.getHeaderLine().or(false));
71
- assertEquals('\t', task.getDelimiterChar());
71
+ assertEquals("\t", task.getDelimiter());
72
72
  assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
73
73
  assertEquals(true, task.getAllowOptionalColumns());
74
74
  }
@@ -88,12 +88,8 @@ public class TestCsvTokenizer
88
88
  while (tokenizer.nextRecord()) {
89
89
  List<String> record = new ArrayList<>();
90
90
  for (Column c : schema.getColumns()) {
91
- String v = tokenizer.nextColumn();
92
- if (!v.isEmpty()) {
93
- record.add(v);
94
- } else {
95
- record.add(tokenizer.wasQuotedColumn() ? "" : null);
96
- }
91
+ String v = tokenizer.nextColumnOrNull();
92
+ record.add(v);
97
93
  }
98
94
  records.add(record);
99
95
  }
@@ -202,6 +198,31 @@ public class TestCsvTokenizer
202
198
  "ccc\tddd"));
203
199
  }
204
200
 
201
+ @Test
202
+ public void testDefaultNullString() throws Exception
203
+ {
204
+ reloadPluginTask();
205
+ assertEquals(expectedRecords(2,
206
+ null, "",
207
+ "NULL", "NULL"),
208
+ parse(task,
209
+ ",\"\"",
210
+ "NULL,\"NULL\""));
211
+ }
212
+
213
+ @Test
214
+ public void testChangeNullString() throws Exception
215
+ {
216
+ config.set("null_string", "NULL");
217
+ reloadPluginTask();
218
+ assertEquals(expectedRecords(2,
219
+ "", "",
220
+ null, null),
221
+ parse(task,
222
+ ",\"\"",
223
+ "NULL,\"NULL\""));
224
+ }
225
+
205
226
  @Test
206
227
  public void testQuotedValues() throws Exception
207
228
  {
@@ -30,7 +30,7 @@ Gem::Specification.new do |gem|
30
30
  gem.platform = 'java'
31
31
 
32
32
  else
33
- gem.add_dependency "jruby-jars", '= 9.0.4.0'
33
+ gem.add_dependency "jruby-jars", '= 9.1.2.0'
34
34
  end
35
35
 
36
36
  gem.add_development_dependency "rake", [">= 0.10.0"]
@@ -122,7 +122,7 @@ EOF
122
122
  # add rules...
123
123
  ##
124
124
 
125
- migrator.write(".ruby-version", "jruby-9.0.4.0")
125
+ migrator.write(".ruby-version", "jruby-9.1.2.0")
126
126
 
127
127
  # update version at the end
128
128
  if from_ver <= version("0.1.0")
@@ -1 +1 @@
1
- jruby-9.0.5.0
1
+ jruby-9.1.2.0
@@ -1 +1 @@
1
- jruby-9.0.4.0
1
+ jruby-9.1.2.0
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.8.9'
2
+ VERSION = '0.8.10'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.9
4
+ version: 0.8.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-12 00:00:00.000000000 Z
11
+ date: 2016-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jruby-jars
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 9.0.4.0
19
+ version: 9.1.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 9.0.4.0
26
+ version: 9.1.2.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -108,9 +108,9 @@ files:
108
108
  - classpath/commons-beanutils-core-1.8.3.jar
109
109
  - classpath/commons-compress-1.10.jar
110
110
  - classpath/commons-lang3-3.1.jar
111
- - classpath/embulk-cli-0.8.9.jar
112
- - classpath/embulk-core-0.8.9.jar
113
- - classpath/embulk-standards-0.8.9.jar
111
+ - classpath/embulk-cli-0.8.10.jar
112
+ - classpath/embulk-core-0.8.10.jar
113
+ - classpath/embulk-standards-0.8.10.jar
114
114
  - classpath/guava-18.0.jar
115
115
  - classpath/guice-4.0.jar
116
116
  - classpath/guice-bootstrap-0.1.1.jar
@@ -126,7 +126,7 @@ files:
126
126
  - classpath/joda-time-2.9.2.jar
127
127
  - classpath/logback-classic-1.1.3.jar
128
128
  - classpath/logback-core-1.1.3.jar
129
- - classpath/msgpack-core-0.8.7.jar
129
+ - classpath/msgpack-core-0.8.8.jar
130
130
  - classpath/netty-buffer-5.0.0.Alpha1.jar
131
131
  - classpath/netty-common-5.0.0.Alpha1.jar
132
132
  - classpath/slf4j-api-1.7.12.jar
@@ -167,6 +167,7 @@ files:
167
167
  - embulk-core/src/main/java/org/embulk/config/UserDataException.java
168
168
  - embulk-core/src/main/java/org/embulk/config/UserDataExceptions.java
169
169
  - embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java
170
+ - embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java
170
171
  - embulk-core/src/main/java/org/embulk/exec/BulkLoader.java
171
172
  - embulk-core/src/main/java/org/embulk/exec/ConfigurableGuessInputPlugin.java
172
173
  - embulk-core/src/main/java/org/embulk/exec/ExecModule.java
@@ -422,6 +423,7 @@ files:
422
423
  - embulk-docs/src/release/release-0.7.9.rst
423
424
  - embulk-docs/src/release/release-0.8.0.rst
424
425
  - embulk-docs/src/release/release-0.8.1.rst
426
+ - embulk-docs/src/release/release-0.8.10.rst
425
427
  - embulk-docs/src/release/release-0.8.2.rst
426
428
  - embulk-docs/src/release/release-0.8.3.rst
427
429
  - embulk-docs/src/release/release-0.8.4.rst