embulk 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cd34264ca6a673f948b77e42cb3049e184a4f40
4
- data.tar.gz: 8d56e898df4411110bc5c42c6817803d93a732fe
3
+ metadata.gz: e0be98e5dbe81e40c6562142d2cdf44cc4f8cf34
4
+ data.tar.gz: f907d431af0add753761547f09dec47113b3b236
5
5
  SHA512:
6
- metadata.gz: 13ad1c043fff795f33199985f955d67238d4d54297516cd3d3ac8abcd1ea90e9cc43569e2745918c6090ce4373ae01a92f2cd26f35fddd37a93aa5d70c8be273
7
- data.tar.gz: f02d113e3eddb344f9080d9dc5163c61e6718455b4d404607eb0a1bd62ac94da547697dd2be5796c6a44404a92ef01c845553f7da7a672b005dc434c5b3c5c13
6
+ metadata.gz: 2a1690e94a7622db588cc6511f1dec583320192960ecdf9851c74ac1f5feaf7bda478b8e6dbf16a7b58f870e38dd464ee949cc20c6f5cadb3473b04bf3cf23db
7
+ data.tar.gz: 0eb2a31661f7772cadee71642781d2ef30cfc5015ce5bf549a3cc36310fbe29283f80d272b91ee788a4513c52ae8c09ecad8e4dce28e459ad11c5370f68a6e22
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.3'
15
+ version = '0.5.4'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -11,6 +11,9 @@ public class PooledBufferAllocator
11
11
  {
12
12
  private PooledByteBufAllocator nettyBuffer;
13
13
 
14
+ private int DEFAULT_BUFFER_SIZE = 32*1024;
15
+ private int MINIMUM_BUFFER_SIZE = 8*1024;
16
+
14
17
  public PooledBufferAllocator()
15
18
  {
16
19
  // TODO configure parameters
@@ -19,12 +22,12 @@ public class PooledBufferAllocator
19
22
 
20
23
  public Buffer allocate()
21
24
  {
22
- return new NettyByteBufBuffer(nettyBuffer.buffer());
25
+ return allocate(DEFAULT_BUFFER_SIZE);
23
26
  }
24
27
 
25
28
  public Buffer allocate(int minimumCapacity)
26
29
  {
27
- int size = 32*1024;
30
+ int size = MINIMUM_BUFFER_SIZE;
28
31
  while (size < minimumCapacity) {
29
32
  size *= 2;
30
33
  }
@@ -28,6 +28,10 @@ public class PluginManager
28
28
  throw new ConfigException("No PluginSource is installed");
29
29
  }
30
30
 
31
+ if (type == null) {
32
+ throw new ConfigException(String.format("%s type is not set (if you intend to use NullOutputPlugin, you should enclose null in quotes such as {type: \"null\"}.", iface.getSimpleName()));
33
+ }
34
+
31
35
  List<Throwable> causes = new ArrayList<Throwable>();
32
36
  for (PluginSource source : sources) {
33
37
  try {
@@ -36,6 +36,7 @@ public class FileOutputOutputStream
36
36
 
37
37
  public void finish()
38
38
  {
39
+ doFlush();
39
40
  out.finish();
40
41
  }
41
42
 
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -O /usr/local/bin/embulk
59
+ $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -25,4 +25,5 @@ Release Notes
25
25
  release/release-0.5.1
26
26
  release/release-0.5.2
27
27
  release/release-0.5.3
28
+ release/release-0.5.4
28
29
 
@@ -0,0 +1,24 @@
1
+ Release 0.5.4
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``parser-csv`` supports ``allow_optional_columns`` option. With this option set to ``true``, the parser sets null to insufficient columns rather than skipping the entire row (@kamatama41++)
8
+
9
+ * Fixed exception handling of ``parser-csv`` so that the transaction properly fails with underlaying exceptions such as IOException
10
+
11
+
12
+ General Changes
13
+ ------------------
14
+
15
+ * Increased buffer size from 256 bytes to 32 KB. This improves performance significantly. (@hito4t++)
16
+
17
+ * If plugin type is null, suggest to use ``{type: "null"}`` (@hito4t++)
18
+
19
+ * Embulk logo is available! See the orca: https://github.com/embulk/embulk/issues/12
20
+
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-03-23
@@ -1,6 +1,5 @@
1
1
  package org.embulk.standards;
2
2
 
3
- import com.google.common.base.Preconditions;
4
3
  import com.google.common.base.Optional;
5
4
  import com.google.common.collect.ImmutableSet;
6
5
  import org.embulk.config.Task;
@@ -21,7 +20,6 @@ import org.embulk.spi.ParserPlugin;
21
20
  import org.embulk.spi.Exec;
22
21
  import org.embulk.spi.FileInput;
23
22
  import org.embulk.spi.PageOutput;
24
- import org.embulk.spi.BufferAllocator;
25
23
  import org.embulk.spi.util.LineDecoder;
26
24
  import org.slf4j.Logger;
27
25
 
@@ -76,6 +74,10 @@ public class CsvParserPlugin
76
74
  @Config("max_quoted_size_limit")
77
75
  @ConfigDefault("131072") //128kB
78
76
  public long getMaxQuotedSizeLimit();
77
+
78
+ @Config("allow_optional_columns")
79
+ @ConfigDefault("false")
80
+ public boolean getAllowOptionalColumns();
79
81
  }
80
82
 
81
83
  private final Logger log;
@@ -127,6 +129,7 @@ public class CsvParserPlugin
127
129
  LineDecoder lineDecoder = new LineDecoder(input, task);
128
130
  final CsvTokenizer tokenizer = new CsvTokenizer(lineDecoder, task);
129
131
  final String nullStringOrNull = task.getNullString().orNull();
132
+ final boolean allowOptionalColumns = task.getAllowOptionalColumns();
130
133
  int skipHeaderLines = task.getSkipHeaderLines();
131
134
 
132
135
  try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
@@ -147,7 +150,7 @@ public class CsvParserPlugin
147
150
  schema.visitColumns(new ColumnVisitor() {
148
151
  public void booleanColumn(Column column)
149
152
  {
150
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
153
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
151
154
  if (v == null) {
152
155
  pageBuilder.setNull(column);
153
156
  } else {
@@ -157,7 +160,7 @@ public class CsvParserPlugin
157
160
 
158
161
  public void longColumn(Column column)
159
162
  {
160
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
163
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
161
164
  if (v == null) {
162
165
  pageBuilder.setNull(column);
163
166
  } else {
@@ -172,7 +175,7 @@ public class CsvParserPlugin
172
175
 
173
176
  public void doubleColumn(Column column)
174
177
  {
175
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
178
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
176
179
  if (v == null) {
177
180
  pageBuilder.setNull(column);
178
181
  } else {
@@ -187,7 +190,7 @@ public class CsvParserPlugin
187
190
 
188
191
  public void stringColumn(Column column)
189
192
  {
190
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
193
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
191
194
  if (v == null) {
192
195
  pageBuilder.setNull(column);
193
196
  } else {
@@ -197,7 +200,7 @@ public class CsvParserPlugin
197
200
 
198
201
  public void timestampColumn(Column column)
199
202
  {
200
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
203
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
201
204
  if (v == null) {
202
205
  pageBuilder.setNull(column);
203
206
  } else {
@@ -212,8 +215,7 @@ public class CsvParserPlugin
212
215
  });
213
216
  pageBuilder.addRecord();
214
217
 
215
- } catch (Exception e) {
216
- // TODO logging
218
+ } catch (CsvTokenizer.InvalidFormatException e) {
217
219
  long lineNumber = tokenizer.getCurrentLineNumber();
218
220
  String skippedLine = tokenizer.skipCurrentLine();
219
221
  log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
@@ -226,8 +228,11 @@ public class CsvParserPlugin
226
228
  }
227
229
  }
228
230
 
229
- private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull)
231
+ private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
230
232
  {
233
+ if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
234
+ return null;
235
+ }
231
236
  String v = tokenizer.nextColumn();
232
237
  if (!v.isEmpty()) {
233
238
  if (v.equals(nullStringOrNull)) {
@@ -20,7 +20,6 @@ public class CsvTokenizer
20
20
  }
21
21
 
22
22
  private static final char END_OF_LINE = '\0';
23
- private static final boolean TRACE = false;
24
23
 
25
24
  private final char delimiter;
26
25
  private final char quote;
@@ -81,7 +80,10 @@ public class CsvTokenizer
81
80
  public boolean nextRecord()
82
81
  {
83
82
  // If at the end of record, read the next line and initialize the state
84
- Preconditions.checkState(recordState == RecordState.END, "too many columns"); // TODO exception class
83
+ if (recordState != RecordState.END) {
84
+ throw new TooManyColumnsException("Too many columns");
85
+ }
86
+
85
87
  boolean hasNext = nextLine(true);
86
88
  if (hasNext) {
87
89
  recordState = RecordState.NOT_END;
@@ -105,10 +107,6 @@ public class CsvTokenizer
105
107
  linePos = 0;
106
108
  lineNumber++;
107
109
 
108
- if (TRACE) {
109
- System.out.println("#MN line: " + line + " (" + lineNumber + ")");
110
- }
111
-
112
110
  if (!line.isEmpty() || !ignoreEmptyLine) {
113
111
  return true;
114
112
  }
@@ -122,7 +120,9 @@ public class CsvTokenizer
122
120
 
123
121
  public String nextColumn()
124
122
  {
125
- Preconditions.checkState(hasNextColumn(), "doesn't have enough columns"); // TODO exception class
123
+ if (!hasNextColumn()) {
124
+ throw new TooFewColumnsException("Too few columns");
125
+ }
126
126
 
127
127
  // reset last state
128
128
  wasQuotedColumn = false;
@@ -136,10 +136,6 @@ public class CsvTokenizer
136
136
 
137
137
  while (true) {
138
138
  final char c = nextChar();
139
- if (TRACE) {
140
- System.out.println("#MN c: " + c + " (" + columnState + "," + recordState + ")");
141
- try { Thread.sleep(100); } catch (Exception e) {}
142
- }
143
139
 
144
140
  switch (columnState) {
145
141
  case BEGIN:
@@ -241,15 +237,12 @@ public class CsvTokenizer
241
237
  quotedValue.append(newline);
242
238
  quotedValueLines.add(line);
243
239
  if (!nextLine(false)) {
244
- throw new RuntimeException("Unexpected end of line during parsing a quoted value"); // TODO exception class
240
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
245
241
  }
246
242
  valueStartPos = 0;
247
243
 
248
244
  } else if (isQuote(c)) {
249
245
  char next = peekNextChar();
250
- if (TRACE) {
251
- System.out.println("#MN peeked c: " + next + " (" + columnState + "," + recordState + ")");
252
- }
253
246
  if (isQuote(next)) { // escaped quote
254
247
  quotedValue.append(line.substring(valueStartPos, linePos));
255
248
  valueStartPos = ++linePos;
@@ -261,15 +254,12 @@ public class CsvTokenizer
261
254
  } else if (isEscape(c)) { // isQuote must be checked first in case of quote == escape
262
255
  // In RFC 4180, CSV's escape char is '\"'. But '\\' is often used.
263
256
  char next = peekNextChar();
264
- if (TRACE) {
265
- System.out.println("#MN peeked c: " + next + " (" + columnState + "," + recordState + ")");
266
- }
267
257
  if (isEndOfLine(c)) {
268
258
  // escape end of line. TODO assuming multi-line quoted value without newline?
269
259
  quotedValue.append(line.substring(valueStartPos, linePos));
270
260
  quotedValueLines.add(line);
271
261
  if (!nextLine(false)) {
272
- throw new RuntimeException("Unexpected end of line during parsing a quoted value"); // TODO exception class
262
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
273
263
  }
274
264
  valueStartPos = 0;
275
265
  } else if (isQuote(next) || isEscape(next)) { // escaped quote
@@ -298,7 +288,7 @@ public class CsvTokenizer
298
288
  // column has trailing spaces and quoted. TODO should this be rejected?
299
289
 
300
290
  } else {
301
- throw new RuntimeException("Unexpected extra character after quoted value"); // TODO exception class
291
+ throw new InvalidValueException("Unexpected extra character after quoted value");
302
292
  }
303
293
  break;
304
294
 
@@ -360,10 +350,46 @@ public class CsvTokenizer
360
350
  return c == escape;
361
351
  }
362
352
 
363
- static class QuotedSizeLimitExceededException
353
+ public static class InvalidFormatException
364
354
  extends RuntimeException
365
355
  {
366
- QuotedSizeLimitExceededException(String message)
356
+ public InvalidFormatException(String message)
357
+ {
358
+ super(message);
359
+ }
360
+ }
361
+
362
+ public static class InvalidValueException
363
+ extends RuntimeException
364
+ {
365
+ public InvalidValueException(String message)
366
+ {
367
+ super(message);
368
+ }
369
+ }
370
+
371
+ public static class QuotedSizeLimitExceededException
372
+ extends InvalidValueException
373
+ {
374
+ public QuotedSizeLimitExceededException(String message)
375
+ {
376
+ super(message);
377
+ }
378
+ }
379
+
380
+ public class TooManyColumnsException
381
+ extends InvalidFormatException
382
+ {
383
+ public TooManyColumnsException(String message)
384
+ {
385
+ super(message);
386
+ }
387
+ }
388
+
389
+ public class TooFewColumnsException
390
+ extends InvalidFormatException
391
+ {
392
+ public TooFewColumnsException(String message)
367
393
  {
368
394
  super(message);
369
395
  }
@@ -1,7 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
3
  import org.junit.Rule;
4
- import org.junit.Before;
5
4
  import org.junit.Test;
6
5
  import static org.junit.Assert.assertEquals;
7
6
  import java.nio.charset.Charset;
@@ -34,6 +33,7 @@ public class TestCsvParserPlugin
34
33
  assertEquals(false, task.getHeaderLine().or(false));
35
34
  assertEquals(',', task.getDelimiterChar());
36
35
  assertEquals('\"', task.getQuoteChar());
36
+ assertEquals(false, task.getAllowOptionalColumns());
37
37
  }
38
38
 
39
39
  @Test(expected = ConfigException.class)
@@ -53,6 +53,7 @@ public class TestCsvParserPlugin
53
53
  .set("header_line", true)
54
54
  .set("delimiter", "\t")
55
55
  .set("quote", "\\")
56
+ .set("allow_optional_columns", true)
56
57
  .set("columns", ImmutableList.of(
57
58
  ImmutableMap.of(
58
59
  "name", "date_code",
@@ -65,5 +66,6 @@ public class TestCsvParserPlugin
65
66
  assertEquals(true, task.getHeaderLine().or(false));
66
67
  assertEquals('\t', task.getDelimiterChar());
67
68
  assertEquals('\\', task.getQuoteChar());
69
+ assertEquals(true, task.getAllowOptionalColumns());
68
70
  }
69
71
  }
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.5.3'
2
+ VERSION = '0.5.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-17 00:00:00.000000000 Z
11
+ date: 2015-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -275,6 +275,7 @@ files:
275
275
  - embulk-docs/src/release/release-0.5.1.rst
276
276
  - embulk-docs/src/release/release-0.5.2.rst
277
277
  - embulk-docs/src/release/release-0.5.3.rst
278
+ - embulk-docs/src/release/release-0.5.4.rst
278
279
  - embulk-standards/build.gradle
279
280
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
280
281
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -379,8 +380,8 @@ files:
379
380
  - classpath/bval-jsr303-0.5.jar
380
381
  - classpath/commons-beanutils-core-1.8.3.jar
381
382
  - classpath/commons-lang3-3.1.jar
382
- - classpath/embulk-core-0.5.3.jar
383
- - classpath/embulk-standards-0.5.3.jar
383
+ - classpath/embulk-core-0.5.4.jar
384
+ - classpath/embulk-standards-0.5.4.jar
384
385
  - classpath/guava-18.0.jar
385
386
  - classpath/guice-3.0.jar
386
387
  - classpath/guice-multibindings-3.0.jar