embulk 0.6.8 → 0.6.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a9e8a1f4e6ada76c6388248ae5ac9a2cdc6876ea
4
- data.tar.gz: b21c680b792319509aa443b4e01809af8d3dcb56
3
+ metadata.gz: 19a42567d7e841a769ba329935d942926a062f66
4
+ data.tar.gz: 0bac8733cc4f5028e62865c08193520fce7f9d79
5
5
  SHA512:
6
- metadata.gz: 7990996b97d6ae238dd20c8f997af9cb68f2d557b94b3d27388b8ede4e675c1c0f044072310d76ce37c4e4625497510ddecc9dee594a783ac91875f398c0137b
7
- data.tar.gz: c2421ec9edd3f3e404302699526b1e226af19755186962b3b27074e53aebbfdb45dabf9e832c0b5a285bb40aad3e5a7aac7000724d735f43cae009b53150e2c9
6
+ metadata.gz: b74b82f6d740d513cce0eadc365824a78737a2cd40bcac32082ff51f03259cd720140eb7463530b3ffe8d8a7d8601a4d91de889a891760888c4b8ddfe9b4bede
7
+ data.tar.gz: 5a77c5eba35a06074289aab700f1351ced771f1f181cf5831d4ccf747692444063be0c32133a8d5bbf5d30fe6e636db398f591898135b351e81aeba7b90bb3db
data/build.gradle CHANGED
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.6.8'
14
+ version = '0.6.9'
15
15
 
16
16
  ext {
17
17
  jrubyVersion = '1.7.19'
@@ -1,6 +1,7 @@
1
1
  package org.embulk.spi.util;
2
2
 
3
3
  import java.io.Writer;
4
+ import java.io.BufferedWriter;
4
5
  import java.io.OutputStreamWriter;
5
6
  import java.io.IOException;
6
7
  import java.nio.charset.Charset;
@@ -47,7 +48,7 @@ public class LineEncoder
47
48
  this.newline = task.getNewline().getString();
48
49
  this.underlyingFileOutput = out;
49
50
  this.outputStream = new FileOutputOutputStream(underlyingFileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH_FINISH);
50
- this.writer = new OutputStreamWriter(outputStream, encoder);
51
+ this.writer = new BufferedWriter(new OutputStreamWriter(outputStream, encoder), 32*1024);
51
52
  }
52
53
 
53
54
  public void addNewLine()
@@ -285,17 +285,39 @@ The ``csv`` formatter plugin formats records using CSV or TSV format.
285
285
  Options
286
286
  ~~~~~~~~~~~~~~~~~~
287
287
 
288
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
289
- | name | type | description | required? |
290
- +================+==========+=======================================================================================================+========================+
291
- | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
292
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
293
- | header\_line | boolean | If true, write the header line with column name at the first line | |
294
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
295
- | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
296
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
297
- | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
298
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
288
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
289
+ | name | type | description | required? |
290
+ +======================+=========+=======================================================================================================+========================+
291
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
292
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
293
+ | quote | string | The character surrounding a quoted value | ``\"`` by default |
294
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
295
+ | quote\_policy | enum | Policy for quote (ALL, MINIMAL, NONE) (see below) | ``MINIMAL`` by default |
296
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
297
+ | escape | string | Escape character to escape a quote character when quote\_policy is ALL or MINIMAL | ``\"`` by default |
298
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
299
+ | header\_line | boolean | If true, write the header line with column name at the first line | ``true`` by default |
300
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
301
+ | null_string | string | Expression of NULL values | empty by default |
302
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
303
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
304
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
305
+ | newline\_in\_field | enum | Newline character in each field (CRLF, LF, CR) | ``LF`` by default |
306
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
307
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
308
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
309
+
310
+ The ``quote_policy`` option is used to determine field type to quote.
311
+
312
+ +------------+--------------------------------------------------------------------------------------------------------+
313
+ | name | description |
314
+ +============+========================================================================================================+
315
+ | ALL | Quote all fields |
316
+ +------------+--------------------------------------------------------------------------------------------------------+
317
+ | MINIMAL | Only quote those fields which contain delimiter, quote or any of the characters in lineterminator |
318
+ +------------+--------------------------------------------------------------------------------------------------------+
319
+ | NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
320
+ +------------+--------------------------------------------------------------------------------------------------------+
299
321
 
300
322
  Example
301
323
  ~~~~~~~~~~~~~~~~~~
@@ -306,9 +328,14 @@ Example
306
328
  ...
307
329
  formatter:
308
330
  - type: csv
309
- delimiter: "\t"
310
- newline: LF
331
+ delimiter: '\t'
332
+ newline: CRLF
333
+ newline_in_field: LF
311
334
  charset: UTF-8
335
+ quote_policy: MINIMAL
336
+ quote: '"'
337
+ escape: '\\'
338
+ null_string: '\\N'
312
339
 
313
340
  Gzip encoder plugin
314
341
  ------------------
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.9
7
8
  release/release-0.6.8
8
9
  release/release-0.6.7
9
10
  release/release-0.6.6
@@ -0,0 +1,24 @@
1
+ Release 0.6.9
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``formatter-csv`` supports ``quote``, ``quote_policy``, ``escape``, ``newline_in_field``, and ``null_string`` options (@sakama++)
8
+
9
+ * ``quote_policy`` controls how to quote values. It can be either of ``ALL`` (quote all values), ``MINIMAL`` (quote if a value includes delimiter or quote character), or ``NONE`` (never quotes).
10
+
11
+ * ``escape`` controls how to escape quote character in a quoted string. The default is ``"`` (``"`` will be ``""``). Some applications may set it to ``\`` (``"`` will be ``\"``)
12
+
13
+ * ``null_string`` controls how to write NULL values. The default is ``""`` (empty string). You can use any strings such as ``\N`` or ``#N/A``.
14
+
15
+ * ``guess-csv`` guesses columns which contain only 0 and 1 in first 32KB as long type rather than boolean type.
16
+
17
+ General Changes
18
+ ------------------
19
+
20
+ * ``spi.util.LineEncoder`` uses buffered writer. This improves performance of ``formatter-csv`` upto 10%.
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-05-14
@@ -1,5 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import com.google.common.collect.ImmutableBiMap;
4
5
  import com.google.common.collect.ImmutableMap;
5
6
  import org.embulk.config.Config;
@@ -20,11 +21,31 @@ import org.embulk.spi.Exec;
20
21
  import org.embulk.spi.FileOutput;
21
22
  import org.embulk.spi.util.LineEncoder;
22
23
 
24
+ import org.embulk.spi.util.Newline;
23
25
  import java.util.Map;
24
26
 
25
27
  public class CsvFormatterPlugin
26
28
  implements FormatterPlugin
27
29
  {
30
+ public enum QuotePolicy
31
+ {
32
+ ALL("ALL"),
33
+ MINIMAL("MINIMAL"),
34
+ NONE("NONE");
35
+
36
+ private final String string;
37
+
38
+ private QuotePolicy(String string)
39
+ {
40
+ this.string = string;
41
+ }
42
+
43
+ public String getString()
44
+ {
45
+ return string;
46
+ }
47
+ }
48
+
28
49
  public interface PluginTask
29
50
  extends LineEncoder.EncoderTask, TimestampFormatter.FormatterTask
30
51
  {
@@ -34,7 +55,27 @@ public class CsvFormatterPlugin
34
55
 
35
56
  @Config("delimiter")
36
57
  @ConfigDefault("\",\"")
37
- public String getDelimiterChar();
58
+ public char getDelimiterChar();
59
+
60
+ @Config("quote")
61
+ @ConfigDefault("\"\\\"\"")
62
+ public char getQuoteChar();
63
+
64
+ @Config("quote_policy")
65
+ @ConfigDefault("\"MINIMAL\"")
66
+ public QuotePolicy getQuotePolicy();
67
+
68
+ @Config("escape")
69
+ @ConfigDefault("null")
70
+ public Optional<Character> getEscapeChar();
71
+
72
+ @Config("null_string")
73
+ @ConfigDefault("\"\"")
74
+ public String getNullString();
75
+
76
+ @Config("newline_in_field")
77
+ @ConfigDefault("\"LF\"")
78
+ public Newline getNewlineInField();
38
79
  }
39
80
 
40
81
  @Override
@@ -66,18 +107,24 @@ public class CsvFormatterPlugin
66
107
  final LineEncoder encoder = new LineEncoder(output, task);
67
108
  final Map<Integer, TimestampFormatter> timestampFormatters =
68
109
  newTimestampFormatters(task, schema);
69
- final String delimiter = task.getDelimiterChar();
110
+ final char delimiter = task.getDelimiterChar();
111
+ final QuotePolicy quotePolicy = task.getQuotePolicy();
112
+ final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
113
+ final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : '\"');
114
+ final String newlineInField = task.getNewlineInField().getString();
115
+ final String nullString = task.getNullString();
70
116
 
71
117
  // create a file
72
118
  encoder.nextFile();
73
119
 
74
120
  // write header
75
121
  if (task.getHeaderLine()) {
76
- writeHeader(schema, encoder, delimiter);
122
+ writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
77
123
  }
78
124
 
79
125
  return new PageOutput() {
80
126
  private final PageReader pageReader = new PageReader(schema);
127
+ private final String delimiterString = String.valueOf(delimiter);
81
128
 
82
129
  public void add(Page page)
83
130
  {
@@ -88,7 +135,9 @@ public class CsvFormatterPlugin
88
135
  {
89
136
  addDelimiter(column);
90
137
  if (!pageReader.isNull(column)) {
91
- encoder.addText(Boolean.toString(pageReader.getBoolean(column)));
138
+ addValue(Boolean.toString(pageReader.getBoolean(column)));
139
+ } else {
140
+ addNullString();
92
141
  }
93
142
  }
94
143
 
@@ -96,7 +145,9 @@ public class CsvFormatterPlugin
96
145
  {
97
146
  addDelimiter(column);
98
147
  if (!pageReader.isNull(column)) {
99
- encoder.addText(Long.toString(pageReader.getLong(column)));
148
+ addValue(Long.toString(pageReader.getLong(column)));
149
+ } else {
150
+ addNullString();
100
151
  }
101
152
  }
102
153
 
@@ -104,7 +155,9 @@ public class CsvFormatterPlugin
104
155
  {
105
156
  addDelimiter(column);
106
157
  if (!pageReader.isNull(column)) {
107
- encoder.addText(Double.toString(pageReader.getDouble(column)));
158
+ addValue(Double.toString(pageReader.getDouble(column)));
159
+ } else {
160
+ addNullString();
108
161
  }
109
162
  }
110
163
 
@@ -112,8 +165,9 @@ public class CsvFormatterPlugin
112
165
  {
113
166
  addDelimiter(column);
114
167
  if (!pageReader.isNull(column)) {
115
- // TODO escape and quoting
116
- encoder.addText(pageReader.getString(column));
168
+ addValue(pageReader.getString(column));
169
+ } else {
170
+ addNullString();
117
171
  }
118
172
  }
119
173
 
@@ -122,18 +176,29 @@ public class CsvFormatterPlugin
122
176
  addDelimiter(column);
123
177
  if (!pageReader.isNull(column)) {
124
178
  Timestamp value = pageReader.getTimestamp(column);
125
- encoder.addText(timestampFormatters.get(column.getIndex()).format(value));
179
+ addValue(timestampFormatters.get(column.getIndex()).format(value));
180
+ } else {
181
+ addNullString();
126
182
  }
127
183
  }
128
184
 
129
185
  private void addDelimiter(Column column)
130
186
  {
131
187
  if (column.getIndex() != 0) {
132
- encoder.addText(delimiter);
188
+ encoder.addText(delimiterString);
133
189
  }
134
190
  }
135
- });
136
191
 
192
+ private void addValue(String v)
193
+ {
194
+ encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
195
+ }
196
+
197
+ private void addNullString()
198
+ {
199
+ encoder.addText(nullString);
200
+ }
201
+ });
137
202
  encoder.addNewLine();
138
203
  }
139
204
  }
@@ -150,14 +215,72 @@ public class CsvFormatterPlugin
150
215
  };
151
216
  }
152
217
 
153
- private void writeHeader(Schema schema, LineEncoder encoder, String delimiter)
218
+ private void writeHeader(Schema schema, LineEncoder encoder, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
154
219
  {
220
+ String delimiterString = String.valueOf(delimiter);
155
221
  for (Column column : schema.getColumns()) {
156
222
  if (column.getIndex() != 0) {
157
- encoder.addText(delimiter);
223
+ encoder.addText(delimiterString);
158
224
  }
159
- encoder.addText(column.getName());
225
+ encoder.addText(setEscapeAndQuoteValue(column.getName(), delimiter, policy, quote, escape, newline, nullString));
160
226
  }
161
227
  encoder.addNewLine();
162
228
  }
229
+
230
+ private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
231
+ {
232
+ StringBuilder escapedValue = new StringBuilder();
233
+ char previousChar = ' ';
234
+
235
+ boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)) ? true : false;
236
+
237
+ for (int i = 0; i < v.length(); i++) {
238
+ char c = v.charAt(i);
239
+
240
+ if (c == quote) {
241
+ escapedValue.append(escape);
242
+ escapedValue.append(c);
243
+ isRequireQuote = true;
244
+ } else if (c == '\r') {
245
+ if (policy == QuotePolicy.NONE) {
246
+ escapedValue.append(escape);
247
+ }
248
+ escapedValue.append(newline);
249
+ isRequireQuote = true;
250
+ } else if (c == '\n') {
251
+ if (previousChar != '\r') {
252
+ if (policy == QuotePolicy.NONE) {
253
+ escapedValue.append(escape);
254
+ }
255
+ escapedValue.append(newline);
256
+ isRequireQuote = true;
257
+ }
258
+ } else if (c == delimiter) {
259
+ if (policy == QuotePolicy.NONE) {
260
+ escapedValue.append(escape);
261
+ }
262
+ escapedValue.append(c);
263
+ isRequireQuote = true;
264
+ } else {
265
+ escapedValue.append(c);
266
+ }
267
+ previousChar = c;
268
+ }
269
+
270
+ if (policy != QuotePolicy.NONE && isRequireQuote) {
271
+ return setQuoteValue(escapedValue.toString(), quote);
272
+ } else {
273
+ return escapedValue.toString();
274
+ }
275
+ }
276
+
277
+ private String setQuoteValue(String v, char quote)
278
+ {
279
+ StringBuilder sb = new StringBuilder();
280
+ sb.append(quote);
281
+ sb.append(v);
282
+ sb.append(quote);
283
+
284
+ return sb.toString();
285
+ }
163
286
  }
@@ -0,0 +1,309 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.ImmutableMap;
5
+ import org.junit.Rule;
6
+ import org.junit.Test;
7
+ import java.lang.reflect.InvocationTargetException;
8
+ import java.lang.reflect.Method;
9
+ import static org.junit.Assert.assertEquals;
10
+ import java.nio.charset.Charset;
11
+ import org.embulk.EmbulkTestRuntime;
12
+ import org.embulk.config.ConfigSource;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.spi.util.Newline;
15
+
16
+ public class TestCsvFormatterPlugin
17
+ {
18
+ @Rule
19
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
20
+
21
+ @Test
22
+ public void checkDefaultValues()
23
+ {
24
+ ConfigSource config = Exec.newConfigSource();
25
+
26
+ CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
27
+ assertEquals(Charset.forName("utf-8"), task.getCharset());
28
+ assertEquals(Newline.CRLF, task.getNewline());
29
+ assertEquals(true, task.getHeaderLine());
30
+ assertEquals(',', task.getDelimiterChar());
31
+ assertEquals('\"', task.getQuoteChar());
32
+ assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
33
+ assertEquals(false, task.getEscapeChar().isPresent());
34
+ assertEquals("", task.getNullString());
35
+ assertEquals(Newline.LF, task.getNewlineInField());
36
+ }
37
+
38
+ @Test
39
+ public void checkLoadConfig()
40
+ {
41
+ ConfigSource config = Exec.newConfigSource()
42
+ .set("charset", "utf-16")
43
+ .set("newline", "LF")
44
+ .set("header_line", false)
45
+ .set("delimiter", "\t")
46
+ .set("quote", "\\")
47
+ .set("quote_policy", "ALL")
48
+ .set("escape", "\"")
49
+ .set("null_string", "\\N")
50
+ .set("newline_in_field", "CRLF");
51
+
52
+ CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
53
+ assertEquals(Charset.forName("utf-16"), task.getCharset());
54
+ assertEquals(Newline.LF, task.getNewline());
55
+ assertEquals(false, task.getHeaderLine());
56
+ assertEquals('\t', task.getDelimiterChar());
57
+ assertEquals('\\', task.getQuoteChar());
58
+ assertEquals(CsvFormatterPlugin.QuotePolicy.ALL, task.getQuotePolicy());
59
+ assertEquals('\"', (char) task.getEscapeChar().get());
60
+ assertEquals("\\N", task.getNullString());
61
+ assertEquals(Newline.CRLF, task.getNewlineInField());
62
+ }
63
+
64
+ @Test
65
+ public void testQuoteValue()
66
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
67
+ {
68
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setQuoteValue", String.class, char.class);
69
+ method.setAccessible(true);
70
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
71
+
72
+ assertEquals("\"ABCD\"", method.invoke(formatter, "ABCD", '"'));
73
+ assertEquals("\"\"", method.invoke(formatter, "", '"'));
74
+ assertEquals("'ABCD'", method.invoke(formatter, "ABCD", '\''));
75
+ assertEquals("''", method.invoke(formatter, "", '\''));
76
+ }
77
+
78
+ @Test
79
+ public void testEscapeQuote()
80
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
81
+ {
82
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
83
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
84
+ method.setAccessible(true);
85
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
86
+
87
+ char delimiter = ',';
88
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
89
+ String newline = Newline.LF.getString();
90
+
91
+ assertEquals("\"AB\\\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '\\', newline, ""));
92
+ assertEquals("\"AB\"\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '"', newline, ""));
93
+ }
94
+
95
+ @Test
96
+ public void testQuotePolicyAll()
97
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
98
+ {
99
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
100
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
101
+ method.setAccessible(true);
102
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
103
+
104
+ char delimiter = ',';
105
+ char quote = '"';
106
+ char escape = '"';
107
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.ALL;
108
+ String newline = Newline.LF.getString();
109
+ String nullString = "";
110
+
111
+ @SuppressWarnings("unchecked")
112
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
113
+ ImmutableMap.of("expected", "\"true\"", "actual", "true"),
114
+ ImmutableMap.of("expected", "\"false\"", "actual", "false"),
115
+ ImmutableMap.of("expected", "\"0\"", "actual", "0"),
116
+ ImmutableMap.of("expected", "\"1\"", "actual", "1"),
117
+ ImmutableMap.of("expected", "\"1234\"", "actual", "1234"),
118
+ ImmutableMap.of("expected", "\"-1234\"", "actual", "-1234"),
119
+ ImmutableMap.of("expected", "\"+1234\"", "actual", "+1234"),
120
+ ImmutableMap.of("expected", "\"0x4d2\"", "actual", "0x4d2"),
121
+ ImmutableMap.of("expected", "\"123L\"", "actual", "123L"),
122
+ ImmutableMap.of("expected", "\"3.141592\"", "actual", "3.141592"),
123
+ ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
124
+ ImmutableMap.of("expected", "\"ABC\"", "actual", "ABC"),
125
+ ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
126
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
127
+ ImmutableMap.of("expected", "\"\"", "actual", ""),
128
+ ImmutableMap.of("expected", "\"NULL\"", "actual", "NULL"),
129
+ ImmutableMap.of("expected", "\"2015-01-01 12:01:01\"", "actual", "2015-01-01 12:01:01"),
130
+ ImmutableMap.of("expected", "\"20150101\"", "actual", "20150101"));
131
+
132
+ for (ImmutableMap testCase : testCases) {
133
+ String expected = (String) testCase.get("expected");
134
+ String actual = (String) testCase.get("actual");
135
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
136
+ }
137
+ }
138
+
139
+ @Test
140
+ public void testQuotePolicyMinimal()
141
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
142
+ {
143
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
144
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
145
+ method.setAccessible(true);
146
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
147
+
148
+ char delimiter = ',';
149
+ char quote = '"';
150
+ char escape = '"';
151
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
152
+ String newline = Newline.LF.getString();
153
+ String nullString = "";
154
+
155
+ @SuppressWarnings("unchecked")
156
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
157
+ ImmutableMap.of("expected", "true", "actual", "true"),
158
+ ImmutableMap.of("expected", "false", "actual", "false"),
159
+ ImmutableMap.of("expected", "0", "actual", "0"),
160
+ ImmutableMap.of("expected", "1", "actual", "1"),
161
+ ImmutableMap.of("expected", "1234", "actual", "1234"),
162
+ ImmutableMap.of("expected", "-1234", "actual", "-1234"),
163
+ ImmutableMap.of("expected", "+1234", "actual", "+1234"),
164
+ ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
165
+ ImmutableMap.of("expected", "123L", "actual", "123L"),
166
+ ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
167
+ ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
168
+ ImmutableMap.of("expected", "ABC", "actual", "ABC"),
169
+ ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
170
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
171
+ ImmutableMap.of("expected", "\"\"", "actual", ""),
172
+ ImmutableMap.of("expected", "NULL", "actual", "NULL"),
173
+ ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
174
+ ImmutableMap.of("expected", "20150101", "actual", "20150101"));
175
+
176
+ for (ImmutableMap testCase : testCases) {
177
+ String expected = (String) testCase.get("expected");
178
+ String actual = (String) testCase.get("actual");
179
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
180
+ }
181
+ }
182
+
183
+ @Test
184
+ public void testQuotePolicyNone()
185
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
186
+ {
187
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
188
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
189
+ method.setAccessible(true);
190
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
191
+
192
+ char delimiter = ',';
193
+ char quote = '"';
194
+ char escape = '"';
195
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.NONE;
196
+ String newline = Newline.LF.getString();
197
+ String nullString = "";
198
+
199
+ @SuppressWarnings("unchecked")
200
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
201
+ ImmutableMap.of("expected", "true", "actual", "true"),
202
+ ImmutableMap.of("expected", "false", "actual", "false"),
203
+ ImmutableMap.of("expected", "0", "actual", "0"),
204
+ ImmutableMap.of("expected", "1", "actual", "1"),
205
+ ImmutableMap.of("expected", "1234", "actual", "1234"),
206
+ ImmutableMap.of("expected", "-1234", "actual", "-1234"),
207
+ ImmutableMap.of("expected", "+1234", "actual", "+1234"),
208
+ ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
209
+ ImmutableMap.of("expected", "123L", "actual", "123L"),
210
+ ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
211
+ ImmutableMap.of("expected", "1\",000", "actual", "1,000"),
212
+ ImmutableMap.of("expected", "ABC", "actual", "ABC"),
213
+ ImmutableMap.of("expected", "ABC\"\"DEF", "actual", "ABC\"DEF"),
214
+ ImmutableMap.of("expected", "ABC\"\nDEF", "actual", "ABC\nDEF"),
215
+ ImmutableMap.of("expected", "", "actual", ""),
216
+ ImmutableMap.of("expected", "NULL", "actual", "NULL"),
217
+ ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
218
+ ImmutableMap.of("expected", "20150101", "actual", "20150101"));
219
+
220
+ for (ImmutableMap testCase : testCases) {
221
+ String expected = (String) testCase.get("expected");
222
+ String actual = (String) testCase.get("actual");
223
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
224
+ }
225
+ }
226
+
227
+ @Test
228
+ public void testNewlineInField()
229
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
230
+ {
231
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
232
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
233
+ method.setAccessible(true);
234
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
235
+
236
+ char delimiter = ',';
237
+ char quote = '"';
238
+ char escape = '"';
239
+ String newline;
240
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
241
+ String nullString = "";
242
+
243
+ ImmutableList<ImmutableMap<String, String>> testCases;
244
+
245
+ newline = Newline.LF.getString();
246
+ testCases = ImmutableList.of(
247
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\r\nDEF"),
248
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\rDEF"),
249
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"));
250
+
251
+ for (ImmutableMap testCase : testCases) {
252
+ String expected = (String) testCase.get("expected");
253
+ String actual = (String) testCase.get("actual");
254
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
255
+ }
256
+
257
+
258
+ newline = Newline.CRLF.getString();
259
+ testCases = ImmutableList.of(
260
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\r\nDEF"),
261
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\rDEF"),
262
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\nDEF"));
263
+
264
+ for (ImmutableMap testCase : testCases) {
265
+ String expected = (String) testCase.get("expected");
266
+ String actual = (String) testCase.get("actual");
267
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
268
+ }
269
+
270
+
271
+ newline = Newline.CR.getString();
272
+ testCases = ImmutableList.of(
273
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\r\nDEF"),
274
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\rDEF"),
275
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\nDEF"));
276
+
277
+ for (ImmutableMap testCase : testCases) {
278
+ String expected = (String) testCase.get("expected");
279
+ String actual = (String) testCase.get("actual");
280
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
281
+ }
282
+ }
283
+
284
+ @Test
285
+ public void testNullString()
286
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
287
+ {
288
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
289
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
290
+ method.setAccessible(true);
291
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
292
+
293
+ char delimiter = ',';
294
+ char quote = '"';
295
+ char escape = '"';
296
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
297
+ String newline = Newline.LF.getString();
298
+
299
+ assertEquals("\"\"", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
300
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
301
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
302
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
303
+
304
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
305
+ assertEquals("\"N/A\"", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
306
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
307
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
308
+ }
309
+ }
@@ -87,7 +87,6 @@ module Embulk::Guess
87
87
  yes Yes YES
88
88
  t T y Y
89
89
  on On ON
90
- 1
91
90
  ].map {|k| [k, true] }]
92
91
 
93
92
  # When matching to false string, then retrun 'true'
@@ -96,7 +95,6 @@ module Embulk::Guess
96
95
  no No NO
97
96
  f N n N
98
97
  off Off OFF
99
- 0
100
98
  ].map {|k| [k, true] }]
101
99
 
102
100
  TYPE_COALESCE = Hash[{
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.8'
2
+ VERSION = '0.6.9'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-13 00:00:00.000000000 Z
11
+ date: 2015-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -300,6 +300,7 @@ files:
300
300
  - embulk-docs/src/release/release-0.6.6.rst
301
301
  - embulk-docs/src/release/release-0.6.7.rst
302
302
  - embulk-docs/src/release/release-0.6.8.rst
303
+ - embulk-docs/src/release/release-0.6.9.rst
303
304
  - embulk-standards/build.gradle
304
305
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
305
306
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -313,6 +314,7 @@ files:
313
314
  - embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java
314
315
  - embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java
315
316
  - embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension
317
+ - embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java
316
318
  - embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java
317
319
  - embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java
318
320
  - embulk.gemspec
@@ -406,8 +408,8 @@ files:
406
408
  - classpath/bval-jsr303-0.5.jar
407
409
  - classpath/commons-beanutils-core-1.8.3.jar
408
410
  - classpath/commons-lang3-3.1.jar
409
- - classpath/embulk-core-0.6.8.jar
410
- - classpath/embulk-standards-0.6.8.jar
411
+ - classpath/embulk-core-0.6.9.jar
412
+ - classpath/embulk-standards-0.6.9.jar
411
413
  - classpath/guava-18.0.jar
412
414
  - classpath/guice-4.0.jar
413
415
  - classpath/guice-multibindings-4.0.jar