embulk 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cd34264ca6a673f948b77e42cb3049e184a4f40
4
- data.tar.gz: 8d56e898df4411110bc5c42c6817803d93a732fe
3
+ metadata.gz: e0be98e5dbe81e40c6562142d2cdf44cc4f8cf34
4
+ data.tar.gz: f907d431af0add753761547f09dec47113b3b236
5
5
  SHA512:
6
- metadata.gz: 13ad1c043fff795f33199985f955d67238d4d54297516cd3d3ac8abcd1ea90e9cc43569e2745918c6090ce4373ae01a92f2cd26f35fddd37a93aa5d70c8be273
7
- data.tar.gz: f02d113e3eddb344f9080d9dc5163c61e6718455b4d404607eb0a1bd62ac94da547697dd2be5796c6a44404a92ef01c845553f7da7a672b005dc434c5b3c5c13
6
+ metadata.gz: 2a1690e94a7622db588cc6511f1dec583320192960ecdf9851c74ac1f5feaf7bda478b8e6dbf16a7b58f870e38dd464ee949cc20c6f5cadb3473b04bf3cf23db
7
+ data.tar.gz: 0eb2a31661f7772cadee71642781d2ef30cfc5015ce5bf549a3cc36310fbe29283f80d272b91ee788a4513c52ae8c09ecad8e4dce28e459ad11c5370f68a6e22
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.3'
15
+ version = '0.5.4'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -11,6 +11,9 @@ public class PooledBufferAllocator
11
11
  {
12
12
  private PooledByteBufAllocator nettyBuffer;
13
13
 
14
+ private int DEFAULT_BUFFER_SIZE = 32*1024;
15
+ private int MINIMUM_BUFFER_SIZE = 8*1024;
16
+
14
17
  public PooledBufferAllocator()
15
18
  {
16
19
  // TODO configure parameters
@@ -19,12 +22,12 @@ public class PooledBufferAllocator
19
22
 
20
23
  public Buffer allocate()
21
24
  {
22
- return new NettyByteBufBuffer(nettyBuffer.buffer());
25
+ return allocate(DEFAULT_BUFFER_SIZE);
23
26
  }
24
27
 
25
28
  public Buffer allocate(int minimumCapacity)
26
29
  {
27
- int size = 32*1024;
30
+ int size = MINIMUM_BUFFER_SIZE;
28
31
  while (size < minimumCapacity) {
29
32
  size *= 2;
30
33
  }
@@ -28,6 +28,10 @@ public class PluginManager
28
28
  throw new ConfigException("No PluginSource is installed");
29
29
  }
30
30
 
31
+ if (type == null) {
32
+ throw new ConfigException(String.format("%s type is not set (if you intend to use NullOutputPlugin, you should enclose null in quotes such as {type: \"null\"}.", iface.getSimpleName()));
33
+ }
34
+
31
35
  List<Throwable> causes = new ArrayList<Throwable>();
32
36
  for (PluginSource source : sources) {
33
37
  try {
@@ -36,6 +36,7 @@ public class FileOutputOutputStream
36
36
 
37
37
  public void finish()
38
38
  {
39
+ doFlush();
39
40
  out.finish();
40
41
  }
41
42
 
@@ -56,7 +56,7 @@ You can find the latest embulk binary from the `releases <https://bintray.com/em
56
56
 
57
57
  .. code-block:: console
58
58
 
59
- $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.3.jar -O /usr/local/bin/embulk
59
+ $ sudo wget https://bintray.com/artifact/download/embulk/maven/embulk-0.5.4.jar -O /usr/local/bin/embulk
60
60
  $ sudo chmod +x /usr/local/bin/embulk
61
61
 
62
62
  Step 2. Install Elasticsearch plugin
@@ -25,4 +25,5 @@ Release Notes
25
25
  release/release-0.5.1
26
26
  release/release-0.5.2
27
27
  release/release-0.5.3
28
+ release/release-0.5.4
28
29
 
@@ -0,0 +1,24 @@
1
+ Release 0.5.4
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``parser-csv`` supports ``allow_optional_columns`` option. With this option set to ``true``, the parser sets null to insufficient columns rather than skipping the entire row (@kamatama41++)
8
+
9
+ * Fixed exception handling of ``parser-csv`` so that the transaction properly fails with underlaying exceptions such as IOException
10
+
11
+
12
+ General Changes
13
+ ------------------
14
+
15
+ * Increased buffer size from 256 bytes to 32 KB. This improves performance significantly. (@hito4t++)
16
+
17
+ * If plugin type is null, suggest to use ``{type: "null"}`` (@hito4t++)
18
+
19
+ * Embulk logo is available! See the orca: https://github.com/embulk/embulk/issues/12
20
+
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-03-23
@@ -1,6 +1,5 @@
1
1
  package org.embulk.standards;
2
2
 
3
- import com.google.common.base.Preconditions;
4
3
  import com.google.common.base.Optional;
5
4
  import com.google.common.collect.ImmutableSet;
6
5
  import org.embulk.config.Task;
@@ -21,7 +20,6 @@ import org.embulk.spi.ParserPlugin;
21
20
  import org.embulk.spi.Exec;
22
21
  import org.embulk.spi.FileInput;
23
22
  import org.embulk.spi.PageOutput;
24
- import org.embulk.spi.BufferAllocator;
25
23
  import org.embulk.spi.util.LineDecoder;
26
24
  import org.slf4j.Logger;
27
25
 
@@ -76,6 +74,10 @@ public class CsvParserPlugin
76
74
  @Config("max_quoted_size_limit")
77
75
  @ConfigDefault("131072") //128kB
78
76
  public long getMaxQuotedSizeLimit();
77
+
78
+ @Config("allow_optional_columns")
79
+ @ConfigDefault("false")
80
+ public boolean getAllowOptionalColumns();
79
81
  }
80
82
 
81
83
  private final Logger log;
@@ -127,6 +129,7 @@ public class CsvParserPlugin
127
129
  LineDecoder lineDecoder = new LineDecoder(input, task);
128
130
  final CsvTokenizer tokenizer = new CsvTokenizer(lineDecoder, task);
129
131
  final String nullStringOrNull = task.getNullString().orNull();
132
+ final boolean allowOptionalColumns = task.getAllowOptionalColumns();
130
133
  int skipHeaderLines = task.getSkipHeaderLines();
131
134
 
132
135
  try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
@@ -147,7 +150,7 @@ public class CsvParserPlugin
147
150
  schema.visitColumns(new ColumnVisitor() {
148
151
  public void booleanColumn(Column column)
149
152
  {
150
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
153
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
151
154
  if (v == null) {
152
155
  pageBuilder.setNull(column);
153
156
  } else {
@@ -157,7 +160,7 @@ public class CsvParserPlugin
157
160
 
158
161
  public void longColumn(Column column)
159
162
  {
160
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
163
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
161
164
  if (v == null) {
162
165
  pageBuilder.setNull(column);
163
166
  } else {
@@ -172,7 +175,7 @@ public class CsvParserPlugin
172
175
 
173
176
  public void doubleColumn(Column column)
174
177
  {
175
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
178
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
176
179
  if (v == null) {
177
180
  pageBuilder.setNull(column);
178
181
  } else {
@@ -187,7 +190,7 @@ public class CsvParserPlugin
187
190
 
188
191
  public void stringColumn(Column column)
189
192
  {
190
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
193
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
191
194
  if (v == null) {
192
195
  pageBuilder.setNull(column);
193
196
  } else {
@@ -197,7 +200,7 @@ public class CsvParserPlugin
197
200
 
198
201
  public void timestampColumn(Column column)
199
202
  {
200
- String v = nextColumn(schema, tokenizer, nullStringOrNull);
203
+ String v = nextColumn(schema, tokenizer, nullStringOrNull, allowOptionalColumns);
201
204
  if (v == null) {
202
205
  pageBuilder.setNull(column);
203
206
  } else {
@@ -212,8 +215,7 @@ public class CsvParserPlugin
212
215
  });
213
216
  pageBuilder.addRecord();
214
217
 
215
- } catch (Exception e) {
216
- // TODO logging
218
+ } catch (CsvTokenizer.InvalidFormatException e) {
217
219
  long lineNumber = tokenizer.getCurrentLineNumber();
218
220
  String skippedLine = tokenizer.skipCurrentLine();
219
221
  log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
@@ -226,8 +228,11 @@ public class CsvParserPlugin
226
228
  }
227
229
  }
228
230
 
229
- private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull)
231
+ private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull, boolean allowOptionalColumns)
230
232
  {
233
+ if(allowOptionalColumns && !tokenizer.hasNextColumn()) {
234
+ return null;
235
+ }
231
236
  String v = tokenizer.nextColumn();
232
237
  if (!v.isEmpty()) {
233
238
  if (v.equals(nullStringOrNull)) {
@@ -20,7 +20,6 @@ public class CsvTokenizer
20
20
  }
21
21
 
22
22
  private static final char END_OF_LINE = '\0';
23
- private static final boolean TRACE = false;
24
23
 
25
24
  private final char delimiter;
26
25
  private final char quote;
@@ -81,7 +80,10 @@ public class CsvTokenizer
81
80
  public boolean nextRecord()
82
81
  {
83
82
  // If at the end of record, read the next line and initialize the state
84
- Preconditions.checkState(recordState == RecordState.END, "too many columns"); // TODO exception class
83
+ if (recordState != RecordState.END) {
84
+ throw new TooManyColumnsException("Too many columns");
85
+ }
86
+
85
87
  boolean hasNext = nextLine(true);
86
88
  if (hasNext) {
87
89
  recordState = RecordState.NOT_END;
@@ -105,10 +107,6 @@ public class CsvTokenizer
105
107
  linePos = 0;
106
108
  lineNumber++;
107
109
 
108
- if (TRACE) {
109
- System.out.println("#MN line: " + line + " (" + lineNumber + ")");
110
- }
111
-
112
110
  if (!line.isEmpty() || !ignoreEmptyLine) {
113
111
  return true;
114
112
  }
@@ -122,7 +120,9 @@ public class CsvTokenizer
122
120
 
123
121
  public String nextColumn()
124
122
  {
125
- Preconditions.checkState(hasNextColumn(), "doesn't have enough columns"); // TODO exception class
123
+ if (!hasNextColumn()) {
124
+ throw new TooFewColumnsException("Too few columns");
125
+ }
126
126
 
127
127
  // reset last state
128
128
  wasQuotedColumn = false;
@@ -136,10 +136,6 @@ public class CsvTokenizer
136
136
 
137
137
  while (true) {
138
138
  final char c = nextChar();
139
- if (TRACE) {
140
- System.out.println("#MN c: " + c + " (" + columnState + "," + recordState + ")");
141
- try { Thread.sleep(100); } catch (Exception e) {}
142
- }
143
139
 
144
140
  switch (columnState) {
145
141
  case BEGIN:
@@ -241,15 +237,12 @@ public class CsvTokenizer
241
237
  quotedValue.append(newline);
242
238
  quotedValueLines.add(line);
243
239
  if (!nextLine(false)) {
244
- throw new RuntimeException("Unexpected end of line during parsing a quoted value"); // TODO exception class
240
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
245
241
  }
246
242
  valueStartPos = 0;
247
243
 
248
244
  } else if (isQuote(c)) {
249
245
  char next = peekNextChar();
250
- if (TRACE) {
251
- System.out.println("#MN peeked c: " + next + " (" + columnState + "," + recordState + ")");
252
- }
253
246
  if (isQuote(next)) { // escaped quote
254
247
  quotedValue.append(line.substring(valueStartPos, linePos));
255
248
  valueStartPos = ++linePos;
@@ -261,15 +254,12 @@ public class CsvTokenizer
261
254
  } else if (isEscape(c)) { // isQuote must be checked first in case of quote == escape
262
255
  // In RFC 4180, CSV's escape char is '\"'. But '\\' is often used.
263
256
  char next = peekNextChar();
264
- if (TRACE) {
265
- System.out.println("#MN peeked c: " + next + " (" + columnState + "," + recordState + ")");
266
- }
267
257
  if (isEndOfLine(c)) {
268
258
  // escape end of line. TODO assuming multi-line quoted value without newline?
269
259
  quotedValue.append(line.substring(valueStartPos, linePos));
270
260
  quotedValueLines.add(line);
271
261
  if (!nextLine(false)) {
272
- throw new RuntimeException("Unexpected end of line during parsing a quoted value"); // TODO exception class
262
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
273
263
  }
274
264
  valueStartPos = 0;
275
265
  } else if (isQuote(next) || isEscape(next)) { // escaped quote
@@ -298,7 +288,7 @@ public class CsvTokenizer
298
288
  // column has trailing spaces and quoted. TODO should this be rejected?
299
289
 
300
290
  } else {
301
- throw new RuntimeException("Unexpected extra character after quoted value"); // TODO exception class
291
+ throw new InvalidValueException("Unexpected extra character after quoted value");
302
292
  }
303
293
  break;
304
294
 
@@ -360,10 +350,46 @@ public class CsvTokenizer
360
350
  return c == escape;
361
351
  }
362
352
 
363
- static class QuotedSizeLimitExceededException
353
+ public static class InvalidFormatException
364
354
  extends RuntimeException
365
355
  {
366
- QuotedSizeLimitExceededException(String message)
356
+ public InvalidFormatException(String message)
357
+ {
358
+ super(message);
359
+ }
360
+ }
361
+
362
+ public static class InvalidValueException
363
+ extends RuntimeException
364
+ {
365
+ public InvalidValueException(String message)
366
+ {
367
+ super(message);
368
+ }
369
+ }
370
+
371
+ public static class QuotedSizeLimitExceededException
372
+ extends InvalidValueException
373
+ {
374
+ public QuotedSizeLimitExceededException(String message)
375
+ {
376
+ super(message);
377
+ }
378
+ }
379
+
380
+ public class TooManyColumnsException
381
+ extends InvalidFormatException
382
+ {
383
+ public TooManyColumnsException(String message)
384
+ {
385
+ super(message);
386
+ }
387
+ }
388
+
389
+ public class TooFewColumnsException
390
+ extends InvalidFormatException
391
+ {
392
+ public TooFewColumnsException(String message)
367
393
  {
368
394
  super(message);
369
395
  }
@@ -1,7 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
3
  import org.junit.Rule;
4
- import org.junit.Before;
5
4
  import org.junit.Test;
6
5
  import static org.junit.Assert.assertEquals;
7
6
  import java.nio.charset.Charset;
@@ -34,6 +33,7 @@ public class TestCsvParserPlugin
34
33
  assertEquals(false, task.getHeaderLine().or(false));
35
34
  assertEquals(',', task.getDelimiterChar());
36
35
  assertEquals('\"', task.getQuoteChar());
36
+ assertEquals(false, task.getAllowOptionalColumns());
37
37
  }
38
38
 
39
39
  @Test(expected = ConfigException.class)
@@ -53,6 +53,7 @@ public class TestCsvParserPlugin
53
53
  .set("header_line", true)
54
54
  .set("delimiter", "\t")
55
55
  .set("quote", "\\")
56
+ .set("allow_optional_columns", true)
56
57
  .set("columns", ImmutableList.of(
57
58
  ImmutableMap.of(
58
59
  "name", "date_code",
@@ -65,5 +66,6 @@ public class TestCsvParserPlugin
65
66
  assertEquals(true, task.getHeaderLine().or(false));
66
67
  assertEquals('\t', task.getDelimiterChar());
67
68
  assertEquals('\\', task.getQuoteChar());
69
+ assertEquals(true, task.getAllowOptionalColumns());
68
70
  }
69
71
  }
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.5.3'
2
+ VERSION = '0.5.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-17 00:00:00.000000000 Z
11
+ date: 2015-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -275,6 +275,7 @@ files:
275
275
  - embulk-docs/src/release/release-0.5.1.rst
276
276
  - embulk-docs/src/release/release-0.5.2.rst
277
277
  - embulk-docs/src/release/release-0.5.3.rst
278
+ - embulk-docs/src/release/release-0.5.4.rst
278
279
  - embulk-standards/build.gradle
279
280
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
280
281
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -379,8 +380,8 @@ files:
379
380
  - classpath/bval-jsr303-0.5.jar
380
381
  - classpath/commons-beanutils-core-1.8.3.jar
381
382
  - classpath/commons-lang3-3.1.jar
382
- - classpath/embulk-core-0.5.3.jar
383
- - classpath/embulk-standards-0.5.3.jar
383
+ - classpath/embulk-core-0.5.4.jar
384
+ - classpath/embulk-standards-0.5.4.jar
384
385
  - classpath/guava-18.0.jar
385
386
  - classpath/guice-3.0.jar
386
387
  - classpath/guice-multibindings-3.0.jar