embulk 0.6.8 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a9e8a1f4e6ada76c6388248ae5ac9a2cdc6876ea
4
- data.tar.gz: b21c680b792319509aa443b4e01809af8d3dcb56
3
+ metadata.gz: 19a42567d7e841a769ba329935d942926a062f66
4
+ data.tar.gz: 0bac8733cc4f5028e62865c08193520fce7f9d79
5
5
  SHA512:
6
- metadata.gz: 7990996b97d6ae238dd20c8f997af9cb68f2d557b94b3d27388b8ede4e675c1c0f044072310d76ce37c4e4625497510ddecc9dee594a783ac91875f398c0137b
7
- data.tar.gz: c2421ec9edd3f3e404302699526b1e226af19755186962b3b27074e53aebbfdb45dabf9e832c0b5a285bb40aad3e5a7aac7000724d735f43cae009b53150e2c9
6
+ metadata.gz: b74b82f6d740d513cce0eadc365824a78737a2cd40bcac32082ff51f03259cd720140eb7463530b3ffe8d8a7d8601a4d91de889a891760888c4b8ddfe9b4bede
7
+ data.tar.gz: 5a77c5eba35a06074289aab700f1351ced771f1f181cf5831d4ccf747692444063be0c32133a8d5bbf5d30fe6e636db398f591898135b351e81aeba7b90bb3db
data/build.gradle CHANGED
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.6.8'
14
+ version = '0.6.9'
15
15
 
16
16
  ext {
17
17
  jrubyVersion = '1.7.19'
@@ -1,6 +1,7 @@
1
1
  package org.embulk.spi.util;
2
2
 
3
3
  import java.io.Writer;
4
+ import java.io.BufferedWriter;
4
5
  import java.io.OutputStreamWriter;
5
6
  import java.io.IOException;
6
7
  import java.nio.charset.Charset;
@@ -47,7 +48,7 @@ public class LineEncoder
47
48
  this.newline = task.getNewline().getString();
48
49
  this.underlyingFileOutput = out;
49
50
  this.outputStream = new FileOutputOutputStream(underlyingFileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH_FINISH);
50
- this.writer = new OutputStreamWriter(outputStream, encoder);
51
+ this.writer = new BufferedWriter(new OutputStreamWriter(outputStream, encoder), 32*1024);
51
52
  }
52
53
 
53
54
  public void addNewLine()
@@ -285,17 +285,39 @@ The ``csv`` formatter plugin formats records using CSV or TSV format.
285
285
  Options
286
286
  ~~~~~~~~~~~~~~~~~~
287
287
 
288
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
289
- | name | type | description | required? |
290
- +================+==========+=======================================================================================================+========================+
291
- | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
292
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
293
- | header\_line | boolean | If true, write the header line with column name at the first line | |
294
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
295
- | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
296
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
297
- | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
298
- +----------------+----------+-------------------------------------------------------------------------------------------------------+------------------------+
288
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
289
+ | name | type | description | required? |
290
+ +======================+=========+=======================================================================================================+========================+
291
+ | delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
292
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
293
+ | quote | string | The character surrounding a quoted value | ``\"`` by default |
294
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
295
+ | quote\_policy | enum | Policy for quote (ALL, MINIMAL, NONE) (see below) | ``MINIMAL`` by default |
296
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
297
+ | escape | string | Escape character to escape a quote character when quote\_policy is ALL or MINIMAL | ``\"`` by default |
298
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
299
+ | header\_line | boolean | If true, write the header line with column name at the first line | ``true`` by default |
300
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
301
+ | null_string | string | Expression of NULL values | empty by default |
302
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
303
+ | newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
304
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
305
+ | newline\_in\_field | enum | Newline character in each field (CRLF, LF, CR) | ``LF`` by default |
306
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
307
+ | charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
308
+ +----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
309
+
310
+ The ``quote_policy`` option is used to determine field type to quote.
311
+
312
+ +------------+--------------------------------------------------------------------------------------------------------+
313
+ | name | description |
314
+ +============+========================================================================================================+
315
+ | ALL | Quote all fields |
316
+ +------------+--------------------------------------------------------------------------------------------------------+
317
+ | MINIMAL | Only quote those fields which contain delimiter, quote or any of the characters in lineterminator |
318
+ +------------+--------------------------------------------------------------------------------------------------------+
319
+ | NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
320
+ +------------+--------------------------------------------------------------------------------------------------------+
299
321
 
300
322
  Example
301
323
  ~~~~~~~~~~~~~~~~~~
@@ -306,9 +328,14 @@ Example
306
328
  ...
307
329
  formatter:
308
330
  - type: csv
309
- delimiter: "\t"
310
- newline: LF
331
+ delimiter: '\t'
332
+ newline: CRLF
333
+ newline_in_field: LF
311
334
  charset: UTF-8
335
+ quote_policy: MINIMAL
336
+ quote: '"'
337
+ escape: '\\'
338
+ null_string: '\\N'
312
339
 
313
340
  Gzip encoder plugin
314
341
  ------------------
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.9
7
8
  release/release-0.6.8
8
9
  release/release-0.6.7
9
10
  release/release-0.6.6
@@ -0,0 +1,24 @@
1
+ Release 0.6.9
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``formatter-csv`` supports ``quote``, ``quote_policy``, ``escape``, ``newline_in_field``, and ``null_string`` options (@sakama++)
8
+
9
+ * ``quote_policy`` controls how to quote values. It can be either of ``ALL`` (quote all values), ``MINIMAL`` (quote if a value includes delimiter or quote character), or ``NONE`` (never quotes).
10
+
11
+ * ``escape`` controls how to escape quote character in a quoted string. The default is ``"`` (``"`` will be ``""``). Some applications may set it to ``\`` (``"`` will be ``\"``)
12
+
13
+ * ``null_string`` controls how to write NULL values. The default is ``""`` (empty string). You can use any strings such as ``\N`` or ``#N/A``.
14
+
15
+ * ``guess-csv`` guesses columns which contain only 0 and 1 in first 32KB as long type rather than boolean type.
16
+
17
+ General Changes
18
+ ------------------
19
+
20
+ * ``spi.util.LineEncoder`` uses buffered writer. This improves performance of ``formatter-csv`` upto 10%.
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-05-14
@@ -1,5 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import com.google.common.collect.ImmutableBiMap;
4
5
  import com.google.common.collect.ImmutableMap;
5
6
  import org.embulk.config.Config;
@@ -20,11 +21,31 @@ import org.embulk.spi.Exec;
20
21
  import org.embulk.spi.FileOutput;
21
22
  import org.embulk.spi.util.LineEncoder;
22
23
 
24
+ import org.embulk.spi.util.Newline;
23
25
  import java.util.Map;
24
26
 
25
27
  public class CsvFormatterPlugin
26
28
  implements FormatterPlugin
27
29
  {
30
+ public enum QuotePolicy
31
+ {
32
+ ALL("ALL"),
33
+ MINIMAL("MINIMAL"),
34
+ NONE("NONE");
35
+
36
+ private final String string;
37
+
38
+ private QuotePolicy(String string)
39
+ {
40
+ this.string = string;
41
+ }
42
+
43
+ public String getString()
44
+ {
45
+ return string;
46
+ }
47
+ }
48
+
28
49
  public interface PluginTask
29
50
  extends LineEncoder.EncoderTask, TimestampFormatter.FormatterTask
30
51
  {
@@ -34,7 +55,27 @@ public class CsvFormatterPlugin
34
55
 
35
56
  @Config("delimiter")
36
57
  @ConfigDefault("\",\"")
37
- public String getDelimiterChar();
58
+ public char getDelimiterChar();
59
+
60
+ @Config("quote")
61
+ @ConfigDefault("\"\\\"\"")
62
+ public char getQuoteChar();
63
+
64
+ @Config("quote_policy")
65
+ @ConfigDefault("\"MINIMAL\"")
66
+ public QuotePolicy getQuotePolicy();
67
+
68
+ @Config("escape")
69
+ @ConfigDefault("null")
70
+ public Optional<Character> getEscapeChar();
71
+
72
+ @Config("null_string")
73
+ @ConfigDefault("\"\"")
74
+ public String getNullString();
75
+
76
+ @Config("newline_in_field")
77
+ @ConfigDefault("\"LF\"")
78
+ public Newline getNewlineInField();
38
79
  }
39
80
 
40
81
  @Override
@@ -66,18 +107,24 @@ public class CsvFormatterPlugin
66
107
  final LineEncoder encoder = new LineEncoder(output, task);
67
108
  final Map<Integer, TimestampFormatter> timestampFormatters =
68
109
  newTimestampFormatters(task, schema);
69
- final String delimiter = task.getDelimiterChar();
110
+ final char delimiter = task.getDelimiterChar();
111
+ final QuotePolicy quotePolicy = task.getQuotePolicy();
112
+ final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
113
+ final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : '\"');
114
+ final String newlineInField = task.getNewlineInField().getString();
115
+ final String nullString = task.getNullString();
70
116
 
71
117
  // create a file
72
118
  encoder.nextFile();
73
119
 
74
120
  // write header
75
121
  if (task.getHeaderLine()) {
76
- writeHeader(schema, encoder, delimiter);
122
+ writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
77
123
  }
78
124
 
79
125
  return new PageOutput() {
80
126
  private final PageReader pageReader = new PageReader(schema);
127
+ private final String delimiterString = String.valueOf(delimiter);
81
128
 
82
129
  public void add(Page page)
83
130
  {
@@ -88,7 +135,9 @@ public class CsvFormatterPlugin
88
135
  {
89
136
  addDelimiter(column);
90
137
  if (!pageReader.isNull(column)) {
91
- encoder.addText(Boolean.toString(pageReader.getBoolean(column)));
138
+ addValue(Boolean.toString(pageReader.getBoolean(column)));
139
+ } else {
140
+ addNullString();
92
141
  }
93
142
  }
94
143
 
@@ -96,7 +145,9 @@ public class CsvFormatterPlugin
96
145
  {
97
146
  addDelimiter(column);
98
147
  if (!pageReader.isNull(column)) {
99
- encoder.addText(Long.toString(pageReader.getLong(column)));
148
+ addValue(Long.toString(pageReader.getLong(column)));
149
+ } else {
150
+ addNullString();
100
151
  }
101
152
  }
102
153
 
@@ -104,7 +155,9 @@ public class CsvFormatterPlugin
104
155
  {
105
156
  addDelimiter(column);
106
157
  if (!pageReader.isNull(column)) {
107
- encoder.addText(Double.toString(pageReader.getDouble(column)));
158
+ addValue(Double.toString(pageReader.getDouble(column)));
159
+ } else {
160
+ addNullString();
108
161
  }
109
162
  }
110
163
 
@@ -112,8 +165,9 @@ public class CsvFormatterPlugin
112
165
  {
113
166
  addDelimiter(column);
114
167
  if (!pageReader.isNull(column)) {
115
- // TODO escape and quoting
116
- encoder.addText(pageReader.getString(column));
168
+ addValue(pageReader.getString(column));
169
+ } else {
170
+ addNullString();
117
171
  }
118
172
  }
119
173
 
@@ -122,18 +176,29 @@ public class CsvFormatterPlugin
122
176
  addDelimiter(column);
123
177
  if (!pageReader.isNull(column)) {
124
178
  Timestamp value = pageReader.getTimestamp(column);
125
- encoder.addText(timestampFormatters.get(column.getIndex()).format(value));
179
+ addValue(timestampFormatters.get(column.getIndex()).format(value));
180
+ } else {
181
+ addNullString();
126
182
  }
127
183
  }
128
184
 
129
185
  private void addDelimiter(Column column)
130
186
  {
131
187
  if (column.getIndex() != 0) {
132
- encoder.addText(delimiter);
188
+ encoder.addText(delimiterString);
133
189
  }
134
190
  }
135
- });
136
191
 
192
+ private void addValue(String v)
193
+ {
194
+ encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
195
+ }
196
+
197
+ private void addNullString()
198
+ {
199
+ encoder.addText(nullString);
200
+ }
201
+ });
137
202
  encoder.addNewLine();
138
203
  }
139
204
  }
@@ -150,14 +215,72 @@ public class CsvFormatterPlugin
150
215
  };
151
216
  }
152
217
 
153
- private void writeHeader(Schema schema, LineEncoder encoder, String delimiter)
218
+ private void writeHeader(Schema schema, LineEncoder encoder, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
154
219
  {
220
+ String delimiterString = String.valueOf(delimiter);
155
221
  for (Column column : schema.getColumns()) {
156
222
  if (column.getIndex() != 0) {
157
- encoder.addText(delimiter);
223
+ encoder.addText(delimiterString);
158
224
  }
159
- encoder.addText(column.getName());
225
+ encoder.addText(setEscapeAndQuoteValue(column.getName(), delimiter, policy, quote, escape, newline, nullString));
160
226
  }
161
227
  encoder.addNewLine();
162
228
  }
229
+
230
+ private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
231
+ {
232
+ StringBuilder escapedValue = new StringBuilder();
233
+ char previousChar = ' ';
234
+
235
+ boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)) ? true : false;
236
+
237
+ for (int i = 0; i < v.length(); i++) {
238
+ char c = v.charAt(i);
239
+
240
+ if (c == quote) {
241
+ escapedValue.append(escape);
242
+ escapedValue.append(c);
243
+ isRequireQuote = true;
244
+ } else if (c == '\r') {
245
+ if (policy == QuotePolicy.NONE) {
246
+ escapedValue.append(escape);
247
+ }
248
+ escapedValue.append(newline);
249
+ isRequireQuote = true;
250
+ } else if (c == '\n') {
251
+ if (previousChar != '\r') {
252
+ if (policy == QuotePolicy.NONE) {
253
+ escapedValue.append(escape);
254
+ }
255
+ escapedValue.append(newline);
256
+ isRequireQuote = true;
257
+ }
258
+ } else if (c == delimiter) {
259
+ if (policy == QuotePolicy.NONE) {
260
+ escapedValue.append(escape);
261
+ }
262
+ escapedValue.append(c);
263
+ isRequireQuote = true;
264
+ } else {
265
+ escapedValue.append(c);
266
+ }
267
+ previousChar = c;
268
+ }
269
+
270
+ if (policy != QuotePolicy.NONE && isRequireQuote) {
271
+ return setQuoteValue(escapedValue.toString(), quote);
272
+ } else {
273
+ return escapedValue.toString();
274
+ }
275
+ }
276
+
277
+ private String setQuoteValue(String v, char quote)
278
+ {
279
+ StringBuilder sb = new StringBuilder();
280
+ sb.append(quote);
281
+ sb.append(v);
282
+ sb.append(quote);
283
+
284
+ return sb.toString();
285
+ }
163
286
  }
@@ -0,0 +1,309 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.ImmutableMap;
5
+ import org.junit.Rule;
6
+ import org.junit.Test;
7
+ import java.lang.reflect.InvocationTargetException;
8
+ import java.lang.reflect.Method;
9
+ import static org.junit.Assert.assertEquals;
10
+ import java.nio.charset.Charset;
11
+ import org.embulk.EmbulkTestRuntime;
12
+ import org.embulk.config.ConfigSource;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.spi.util.Newline;
15
+
16
+ public class TestCsvFormatterPlugin
17
+ {
18
+ @Rule
19
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
20
+
21
+ @Test
22
+ public void checkDefaultValues()
23
+ {
24
+ ConfigSource config = Exec.newConfigSource();
25
+
26
+ CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
27
+ assertEquals(Charset.forName("utf-8"), task.getCharset());
28
+ assertEquals(Newline.CRLF, task.getNewline());
29
+ assertEquals(true, task.getHeaderLine());
30
+ assertEquals(',', task.getDelimiterChar());
31
+ assertEquals('\"', task.getQuoteChar());
32
+ assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
33
+ assertEquals(false, task.getEscapeChar().isPresent());
34
+ assertEquals("", task.getNullString());
35
+ assertEquals(Newline.LF, task.getNewlineInField());
36
+ }
37
+
38
+ @Test
39
+ public void checkLoadConfig()
40
+ {
41
+ ConfigSource config = Exec.newConfigSource()
42
+ .set("charset", "utf-16")
43
+ .set("newline", "LF")
44
+ .set("header_line", false)
45
+ .set("delimiter", "\t")
46
+ .set("quote", "\\")
47
+ .set("quote_policy", "ALL")
48
+ .set("escape", "\"")
49
+ .set("null_string", "\\N")
50
+ .set("newline_in_field", "CRLF");
51
+
52
+ CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
53
+ assertEquals(Charset.forName("utf-16"), task.getCharset());
54
+ assertEquals(Newline.LF, task.getNewline());
55
+ assertEquals(false, task.getHeaderLine());
56
+ assertEquals('\t', task.getDelimiterChar());
57
+ assertEquals('\\', task.getQuoteChar());
58
+ assertEquals(CsvFormatterPlugin.QuotePolicy.ALL, task.getQuotePolicy());
59
+ assertEquals('\"', (char) task.getEscapeChar().get());
60
+ assertEquals("\\N", task.getNullString());
61
+ assertEquals(Newline.CRLF, task.getNewlineInField());
62
+ }
63
+
64
+ @Test
65
+ public void testQuoteValue()
66
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
67
+ {
68
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setQuoteValue", String.class, char.class);
69
+ method.setAccessible(true);
70
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
71
+
72
+ assertEquals("\"ABCD\"", method.invoke(formatter, "ABCD", '"'));
73
+ assertEquals("\"\"", method.invoke(formatter, "", '"'));
74
+ assertEquals("'ABCD'", method.invoke(formatter, "ABCD", '\''));
75
+ assertEquals("''", method.invoke(formatter, "", '\''));
76
+ }
77
+
78
+ @Test
79
+ public void testEscapeQuote()
80
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
81
+ {
82
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
83
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
84
+ method.setAccessible(true);
85
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
86
+
87
+ char delimiter = ',';
88
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
89
+ String newline = Newline.LF.getString();
90
+
91
+ assertEquals("\"AB\\\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '\\', newline, ""));
92
+ assertEquals("\"AB\"\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '"', newline, ""));
93
+ }
94
+
95
+ @Test
96
+ public void testQuotePolicyAll()
97
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
98
+ {
99
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
100
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
101
+ method.setAccessible(true);
102
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
103
+
104
+ char delimiter = ',';
105
+ char quote = '"';
106
+ char escape = '"';
107
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.ALL;
108
+ String newline = Newline.LF.getString();
109
+ String nullString = "";
110
+
111
+ @SuppressWarnings("unchecked")
112
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
113
+ ImmutableMap.of("expected", "\"true\"", "actual", "true"),
114
+ ImmutableMap.of("expected", "\"false\"", "actual", "false"),
115
+ ImmutableMap.of("expected", "\"0\"", "actual", "0"),
116
+ ImmutableMap.of("expected", "\"1\"", "actual", "1"),
117
+ ImmutableMap.of("expected", "\"1234\"", "actual", "1234"),
118
+ ImmutableMap.of("expected", "\"-1234\"", "actual", "-1234"),
119
+ ImmutableMap.of("expected", "\"+1234\"", "actual", "+1234"),
120
+ ImmutableMap.of("expected", "\"0x4d2\"", "actual", "0x4d2"),
121
+ ImmutableMap.of("expected", "\"123L\"", "actual", "123L"),
122
+ ImmutableMap.of("expected", "\"3.141592\"", "actual", "3.141592"),
123
+ ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
124
+ ImmutableMap.of("expected", "\"ABC\"", "actual", "ABC"),
125
+ ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
126
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
127
+ ImmutableMap.of("expected", "\"\"", "actual", ""),
128
+ ImmutableMap.of("expected", "\"NULL\"", "actual", "NULL"),
129
+ ImmutableMap.of("expected", "\"2015-01-01 12:01:01\"", "actual", "2015-01-01 12:01:01"),
130
+ ImmutableMap.of("expected", "\"20150101\"", "actual", "20150101"));
131
+
132
+ for (ImmutableMap testCase : testCases) {
133
+ String expected = (String) testCase.get("expected");
134
+ String actual = (String) testCase.get("actual");
135
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
136
+ }
137
+ }
138
+
139
+ @Test
140
+ public void testQuotePolicyMinimal()
141
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
142
+ {
143
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
144
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
145
+ method.setAccessible(true);
146
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
147
+
148
+ char delimiter = ',';
149
+ char quote = '"';
150
+ char escape = '"';
151
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
152
+ String newline = Newline.LF.getString();
153
+ String nullString = "";
154
+
155
+ @SuppressWarnings("unchecked")
156
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
157
+ ImmutableMap.of("expected", "true", "actual", "true"),
158
+ ImmutableMap.of("expected", "false", "actual", "false"),
159
+ ImmutableMap.of("expected", "0", "actual", "0"),
160
+ ImmutableMap.of("expected", "1", "actual", "1"),
161
+ ImmutableMap.of("expected", "1234", "actual", "1234"),
162
+ ImmutableMap.of("expected", "-1234", "actual", "-1234"),
163
+ ImmutableMap.of("expected", "+1234", "actual", "+1234"),
164
+ ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
165
+ ImmutableMap.of("expected", "123L", "actual", "123L"),
166
+ ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
167
+ ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
168
+ ImmutableMap.of("expected", "ABC", "actual", "ABC"),
169
+ ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
170
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
171
+ ImmutableMap.of("expected", "\"\"", "actual", ""),
172
+ ImmutableMap.of("expected", "NULL", "actual", "NULL"),
173
+ ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
174
+ ImmutableMap.of("expected", "20150101", "actual", "20150101"));
175
+
176
+ for (ImmutableMap testCase : testCases) {
177
+ String expected = (String) testCase.get("expected");
178
+ String actual = (String) testCase.get("actual");
179
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
180
+ }
181
+ }
182
+
183
+ @Test
184
+ public void testQuotePolicyNone()
185
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
186
+ {
187
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
188
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
189
+ method.setAccessible(true);
190
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
191
+
192
+ char delimiter = ',';
193
+ char quote = '"';
194
+ char escape = '"';
195
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.NONE;
196
+ String newline = Newline.LF.getString();
197
+ String nullString = "";
198
+
199
+ @SuppressWarnings("unchecked")
200
+ ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
201
+ ImmutableMap.of("expected", "true", "actual", "true"),
202
+ ImmutableMap.of("expected", "false", "actual", "false"),
203
+ ImmutableMap.of("expected", "0", "actual", "0"),
204
+ ImmutableMap.of("expected", "1", "actual", "1"),
205
+ ImmutableMap.of("expected", "1234", "actual", "1234"),
206
+ ImmutableMap.of("expected", "-1234", "actual", "-1234"),
207
+ ImmutableMap.of("expected", "+1234", "actual", "+1234"),
208
+ ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
209
+ ImmutableMap.of("expected", "123L", "actual", "123L"),
210
+ ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
211
+ ImmutableMap.of("expected", "1\",000", "actual", "1,000"),
212
+ ImmutableMap.of("expected", "ABC", "actual", "ABC"),
213
+ ImmutableMap.of("expected", "ABC\"\"DEF", "actual", "ABC\"DEF"),
214
+ ImmutableMap.of("expected", "ABC\"\nDEF", "actual", "ABC\nDEF"),
215
+ ImmutableMap.of("expected", "", "actual", ""),
216
+ ImmutableMap.of("expected", "NULL", "actual", "NULL"),
217
+ ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
218
+ ImmutableMap.of("expected", "20150101", "actual", "20150101"));
219
+
220
+ for (ImmutableMap testCase : testCases) {
221
+ String expected = (String) testCase.get("expected");
222
+ String actual = (String) testCase.get("actual");
223
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
224
+ }
225
+ }
226
+
227
+ @Test
228
+ public void testNewlineInField()
229
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
230
+ {
231
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
232
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
233
+ method.setAccessible(true);
234
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
235
+
236
+ char delimiter = ',';
237
+ char quote = '"';
238
+ char escape = '"';
239
+ String newline;
240
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
241
+ String nullString = "";
242
+
243
+ ImmutableList<ImmutableMap<String, String>> testCases;
244
+
245
+ newline = Newline.LF.getString();
246
+ testCases = ImmutableList.of(
247
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\r\nDEF"),
248
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\rDEF"),
249
+ ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"));
250
+
251
+ for (ImmutableMap testCase : testCases) {
252
+ String expected = (String) testCase.get("expected");
253
+ String actual = (String) testCase.get("actual");
254
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
255
+ }
256
+
257
+
258
+ newline = Newline.CRLF.getString();
259
+ testCases = ImmutableList.of(
260
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\r\nDEF"),
261
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\rDEF"),
262
+ ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\nDEF"));
263
+
264
+ for (ImmutableMap testCase : testCases) {
265
+ String expected = (String) testCase.get("expected");
266
+ String actual = (String) testCase.get("actual");
267
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
268
+ }
269
+
270
+
271
+ newline = Newline.CR.getString();
272
+ testCases = ImmutableList.of(
273
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\r\nDEF"),
274
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\rDEF"),
275
+ ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\nDEF"));
276
+
277
+ for (ImmutableMap testCase : testCases) {
278
+ String expected = (String) testCase.get("expected");
279
+ String actual = (String) testCase.get("actual");
280
+ assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
281
+ }
282
+ }
283
+
284
+ @Test
285
+ public void testNullString()
286
+ throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
287
+ {
288
+ Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
289
+ CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
290
+ method.setAccessible(true);
291
+ CsvFormatterPlugin formatter = new CsvFormatterPlugin();
292
+
293
+ char delimiter = ',';
294
+ char quote = '"';
295
+ char escape = '"';
296
+ CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
297
+ String newline = Newline.LF.getString();
298
+
299
+ assertEquals("\"\"", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
300
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
301
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
302
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
303
+
304
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
305
+ assertEquals("\"N/A\"", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
306
+ assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
307
+ assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
308
+ }
309
+ }
@@ -87,7 +87,6 @@ module Embulk::Guess
87
87
  yes Yes YES
88
88
  t T y Y
89
89
  on On ON
90
- 1
91
90
  ].map {|k| [k, true] }]
92
91
 
93
92
  # When matching to false string, then retrun 'true'
@@ -96,7 +95,6 @@ module Embulk::Guess
96
95
  no No NO
97
96
  f N n N
98
97
  off Off OFF
99
- 0
100
98
  ].map {|k| [k, true] }]
101
99
 
102
100
  TYPE_COALESCE = Hash[{
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.8'
2
+ VERSION = '0.6.9'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-13 00:00:00.000000000 Z
11
+ date: 2015-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -300,6 +300,7 @@ files:
300
300
  - embulk-docs/src/release/release-0.6.6.rst
301
301
  - embulk-docs/src/release/release-0.6.7.rst
302
302
  - embulk-docs/src/release/release-0.6.8.rst
303
+ - embulk-docs/src/release/release-0.6.9.rst
303
304
  - embulk-standards/build.gradle
304
305
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
305
306
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -313,6 +314,7 @@ files:
313
314
  - embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java
314
315
  - embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java
315
316
  - embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension
317
+ - embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java
316
318
  - embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java
317
319
  - embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java
318
320
  - embulk.gemspec
@@ -406,8 +408,8 @@ files:
406
408
  - classpath/bval-jsr303-0.5.jar
407
409
  - classpath/commons-beanutils-core-1.8.3.jar
408
410
  - classpath/commons-lang3-3.1.jar
409
- - classpath/embulk-core-0.6.8.jar
410
- - classpath/embulk-standards-0.6.8.jar
411
+ - classpath/embulk-core-0.6.9.jar
412
+ - classpath/embulk-standards-0.6.9.jar
411
413
  - classpath/guava-18.0.jar
412
414
  - classpath/guice-4.0.jar
413
415
  - classpath/guice-multibindings-4.0.jar