embulk 0.6.8 → 0.6.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -1
- data/embulk-docs/src/built-in.rst +40 -13
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.9.rst +24 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +137 -14
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +309 -0
- data/lib/embulk/guess/schema_guess.rb +0 -2
- data/lib/embulk/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19a42567d7e841a769ba329935d942926a062f66
|
4
|
+
data.tar.gz: 0bac8733cc4f5028e62865c08193520fce7f9d79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b74b82f6d740d513cce0eadc365824a78737a2cd40bcac32082ff51f03259cd720140eb7463530b3ffe8d8a7d8601a4d91de889a891760888c4b8ddfe9b4bede
|
7
|
+
data.tar.gz: 5a77c5eba35a06074289aab700f1351ced771f1f181cf5831d4ccf747692444063be0c32133a8d5bbf5d30fe6e636db398f591898135b351e81aeba7b90bb3db
|
data/build.gradle
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.spi.util;
|
2
2
|
|
3
3
|
import java.io.Writer;
|
4
|
+
import java.io.BufferedWriter;
|
4
5
|
import java.io.OutputStreamWriter;
|
5
6
|
import java.io.IOException;
|
6
7
|
import java.nio.charset.Charset;
|
@@ -47,7 +48,7 @@ public class LineEncoder
|
|
47
48
|
this.newline = task.getNewline().getString();
|
48
49
|
this.underlyingFileOutput = out;
|
49
50
|
this.outputStream = new FileOutputOutputStream(underlyingFileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH_FINISH);
|
50
|
-
this.writer = new OutputStreamWriter(outputStream, encoder);
|
51
|
+
this.writer = new BufferedWriter(new OutputStreamWriter(outputStream, encoder), 32*1024);
|
51
52
|
}
|
52
53
|
|
53
54
|
public void addNewLine()
|
@@ -285,17 +285,39 @@ The ``csv`` formatter plugin formats records using CSV or TSV format.
|
|
285
285
|
Options
|
286
286
|
~~~~~~~~~~~~~~~~~~
|
287
287
|
|
288
|
-
|
289
|
-
| name
|
290
|
-
|
291
|
-
| delimiter
|
292
|
-
|
293
|
-
|
|
294
|
-
|
295
|
-
|
|
296
|
-
|
297
|
-
|
|
298
|
-
|
288
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
289
|
+
| name | type | description | required? |
|
290
|
+
+======================+=========+=======================================================================================================+========================+
|
291
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
292
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
293
|
+
| quote | string | The character surrounding a quoted value | ``\"`` by default |
|
294
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
295
|
+
| quote\_policy | enum | Policy for quote (ALL, MINIMAL, NONE) (see below) | ``MINIMAL`` by default |
|
296
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
297
|
+
| escape | string | Escape character to escape a quote character when quote\_policy is ALL or MINIMAL | ``\"`` by default |
|
298
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
299
|
+
| header\_line | boolean | If true, write the header line with column name at the first line | ``true`` by default |
|
300
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
301
|
+
| null_string | string | Expression of NULL values | empty by default |
|
302
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
303
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
304
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
305
|
+
| newline\_in\_field | enum | Newline character in each field (CRLF, LF, CR) | ``LF`` by default |
|
306
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
307
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
308
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
309
|
+
|
310
|
+
The ``quote_policy`` option is used to determine field type to quote.
|
311
|
+
|
312
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
313
|
+
| name | description |
|
314
|
+
+============+========================================================================================================+
|
315
|
+
| ALL | Quote all fields |
|
316
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
317
|
+
| MINIMAL | Only quote those fields which contain delimiter, quote or any of the characters in lineterminator |
|
318
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
319
|
+
| NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
|
320
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
299
321
|
|
300
322
|
Example
|
301
323
|
~~~~~~~~~~~~~~~~~~
|
@@ -306,9 +328,14 @@ Example
|
|
306
328
|
...
|
307
329
|
formatter:
|
308
330
|
- type: csv
|
309
|
-
delimiter:
|
310
|
-
newline:
|
331
|
+
delimiter: '\t'
|
332
|
+
newline: CRLF
|
333
|
+
newline_in_field: LF
|
311
334
|
charset: UTF-8
|
335
|
+
quote_policy: MINIMAL
|
336
|
+
quote: '"'
|
337
|
+
escape: '\\'
|
338
|
+
null_string: '\\N'
|
312
339
|
|
313
340
|
Gzip encoder plugin
|
314
341
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
Release 0.6.9
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``formatter-csv`` supports ``quote``, ``quote_policy``, ``escape``, ``newline_in_field``, and ``null_string`` options (@sakama++)
|
8
|
+
|
9
|
+
* ``quote_policy`` controls how to quote values. It can be either of ``ALL`` (quote all values), ``MINIMAL`` (quote if a value includes delimiter or quote character), or ``NONE`` (never quotes).
|
10
|
+
|
11
|
+
* ``escape`` controls how to escape quote character in a quoted string. The default is ``"`` (``"`` will be ``""``). Some applications may set it to ``\`` (``"`` will be ``\"``)
|
12
|
+
|
13
|
+
* ``null_string`` controls how to write NULL values. The default is ``""`` (empty string). You can use any strings such as ``\N`` or ``#N/A``.
|
14
|
+
|
15
|
+
* ``guess-csv`` guesses columns which contain only 0 and 1 in first 32KB as long type rather than boolean type.
|
16
|
+
|
17
|
+
General Changes
|
18
|
+
------------------
|
19
|
+
|
20
|
+
* ``spi.util.LineEncoder`` uses buffered writer. This improves performance of ``formatter-csv`` upto 10%.
|
21
|
+
|
22
|
+
Release Date
|
23
|
+
------------------
|
24
|
+
2015-05-14
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
3
4
|
import com.google.common.collect.ImmutableBiMap;
|
4
5
|
import com.google.common.collect.ImmutableMap;
|
5
6
|
import org.embulk.config.Config;
|
@@ -20,11 +21,31 @@ import org.embulk.spi.Exec;
|
|
20
21
|
import org.embulk.spi.FileOutput;
|
21
22
|
import org.embulk.spi.util.LineEncoder;
|
22
23
|
|
24
|
+
import org.embulk.spi.util.Newline;
|
23
25
|
import java.util.Map;
|
24
26
|
|
25
27
|
public class CsvFormatterPlugin
|
26
28
|
implements FormatterPlugin
|
27
29
|
{
|
30
|
+
public enum QuotePolicy
|
31
|
+
{
|
32
|
+
ALL("ALL"),
|
33
|
+
MINIMAL("MINIMAL"),
|
34
|
+
NONE("NONE");
|
35
|
+
|
36
|
+
private final String string;
|
37
|
+
|
38
|
+
private QuotePolicy(String string)
|
39
|
+
{
|
40
|
+
this.string = string;
|
41
|
+
}
|
42
|
+
|
43
|
+
public String getString()
|
44
|
+
{
|
45
|
+
return string;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
28
49
|
public interface PluginTask
|
29
50
|
extends LineEncoder.EncoderTask, TimestampFormatter.FormatterTask
|
30
51
|
{
|
@@ -34,7 +55,27 @@ public class CsvFormatterPlugin
|
|
34
55
|
|
35
56
|
@Config("delimiter")
|
36
57
|
@ConfigDefault("\",\"")
|
37
|
-
public
|
58
|
+
public char getDelimiterChar();
|
59
|
+
|
60
|
+
@Config("quote")
|
61
|
+
@ConfigDefault("\"\\\"\"")
|
62
|
+
public char getQuoteChar();
|
63
|
+
|
64
|
+
@Config("quote_policy")
|
65
|
+
@ConfigDefault("\"MINIMAL\"")
|
66
|
+
public QuotePolicy getQuotePolicy();
|
67
|
+
|
68
|
+
@Config("escape")
|
69
|
+
@ConfigDefault("null")
|
70
|
+
public Optional<Character> getEscapeChar();
|
71
|
+
|
72
|
+
@Config("null_string")
|
73
|
+
@ConfigDefault("\"\"")
|
74
|
+
public String getNullString();
|
75
|
+
|
76
|
+
@Config("newline_in_field")
|
77
|
+
@ConfigDefault("\"LF\"")
|
78
|
+
public Newline getNewlineInField();
|
38
79
|
}
|
39
80
|
|
40
81
|
@Override
|
@@ -66,18 +107,24 @@ public class CsvFormatterPlugin
|
|
66
107
|
final LineEncoder encoder = new LineEncoder(output, task);
|
67
108
|
final Map<Integer, TimestampFormatter> timestampFormatters =
|
68
109
|
newTimestampFormatters(task, schema);
|
69
|
-
final
|
110
|
+
final char delimiter = task.getDelimiterChar();
|
111
|
+
final QuotePolicy quotePolicy = task.getQuotePolicy();
|
112
|
+
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
|
113
|
+
final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : '\"');
|
114
|
+
final String newlineInField = task.getNewlineInField().getString();
|
115
|
+
final String nullString = task.getNullString();
|
70
116
|
|
71
117
|
// create a file
|
72
118
|
encoder.nextFile();
|
73
119
|
|
74
120
|
// write header
|
75
121
|
if (task.getHeaderLine()) {
|
76
|
-
writeHeader(schema, encoder, delimiter);
|
122
|
+
writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
|
77
123
|
}
|
78
124
|
|
79
125
|
return new PageOutput() {
|
80
126
|
private final PageReader pageReader = new PageReader(schema);
|
127
|
+
private final String delimiterString = String.valueOf(delimiter);
|
81
128
|
|
82
129
|
public void add(Page page)
|
83
130
|
{
|
@@ -88,7 +135,9 @@ public class CsvFormatterPlugin
|
|
88
135
|
{
|
89
136
|
addDelimiter(column);
|
90
137
|
if (!pageReader.isNull(column)) {
|
91
|
-
|
138
|
+
addValue(Boolean.toString(pageReader.getBoolean(column)));
|
139
|
+
} else {
|
140
|
+
addNullString();
|
92
141
|
}
|
93
142
|
}
|
94
143
|
|
@@ -96,7 +145,9 @@ public class CsvFormatterPlugin
|
|
96
145
|
{
|
97
146
|
addDelimiter(column);
|
98
147
|
if (!pageReader.isNull(column)) {
|
99
|
-
|
148
|
+
addValue(Long.toString(pageReader.getLong(column)));
|
149
|
+
} else {
|
150
|
+
addNullString();
|
100
151
|
}
|
101
152
|
}
|
102
153
|
|
@@ -104,7 +155,9 @@ public class CsvFormatterPlugin
|
|
104
155
|
{
|
105
156
|
addDelimiter(column);
|
106
157
|
if (!pageReader.isNull(column)) {
|
107
|
-
|
158
|
+
addValue(Double.toString(pageReader.getDouble(column)));
|
159
|
+
} else {
|
160
|
+
addNullString();
|
108
161
|
}
|
109
162
|
}
|
110
163
|
|
@@ -112,8 +165,9 @@ public class CsvFormatterPlugin
|
|
112
165
|
{
|
113
166
|
addDelimiter(column);
|
114
167
|
if (!pageReader.isNull(column)) {
|
115
|
-
|
116
|
-
|
168
|
+
addValue(pageReader.getString(column));
|
169
|
+
} else {
|
170
|
+
addNullString();
|
117
171
|
}
|
118
172
|
}
|
119
173
|
|
@@ -122,18 +176,29 @@ public class CsvFormatterPlugin
|
|
122
176
|
addDelimiter(column);
|
123
177
|
if (!pageReader.isNull(column)) {
|
124
178
|
Timestamp value = pageReader.getTimestamp(column);
|
125
|
-
|
179
|
+
addValue(timestampFormatters.get(column.getIndex()).format(value));
|
180
|
+
} else {
|
181
|
+
addNullString();
|
126
182
|
}
|
127
183
|
}
|
128
184
|
|
129
185
|
private void addDelimiter(Column column)
|
130
186
|
{
|
131
187
|
if (column.getIndex() != 0) {
|
132
|
-
encoder.addText(
|
188
|
+
encoder.addText(delimiterString);
|
133
189
|
}
|
134
190
|
}
|
135
|
-
});
|
136
191
|
|
192
|
+
private void addValue(String v)
|
193
|
+
{
|
194
|
+
encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
|
195
|
+
}
|
196
|
+
|
197
|
+
private void addNullString()
|
198
|
+
{
|
199
|
+
encoder.addText(nullString);
|
200
|
+
}
|
201
|
+
});
|
137
202
|
encoder.addNewLine();
|
138
203
|
}
|
139
204
|
}
|
@@ -150,14 +215,72 @@ public class CsvFormatterPlugin
|
|
150
215
|
};
|
151
216
|
}
|
152
217
|
|
153
|
-
private void writeHeader(Schema schema, LineEncoder encoder, String
|
218
|
+
private void writeHeader(Schema schema, LineEncoder encoder, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
|
154
219
|
{
|
220
|
+
String delimiterString = String.valueOf(delimiter);
|
155
221
|
for (Column column : schema.getColumns()) {
|
156
222
|
if (column.getIndex() != 0) {
|
157
|
-
encoder.addText(
|
223
|
+
encoder.addText(delimiterString);
|
158
224
|
}
|
159
|
-
encoder.addText(column.getName());
|
225
|
+
encoder.addText(setEscapeAndQuoteValue(column.getName(), delimiter, policy, quote, escape, newline, nullString));
|
160
226
|
}
|
161
227
|
encoder.addNewLine();
|
162
228
|
}
|
229
|
+
|
230
|
+
private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
|
231
|
+
{
|
232
|
+
StringBuilder escapedValue = new StringBuilder();
|
233
|
+
char previousChar = ' ';
|
234
|
+
|
235
|
+
boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)) ? true : false;
|
236
|
+
|
237
|
+
for (int i = 0; i < v.length(); i++) {
|
238
|
+
char c = v.charAt(i);
|
239
|
+
|
240
|
+
if (c == quote) {
|
241
|
+
escapedValue.append(escape);
|
242
|
+
escapedValue.append(c);
|
243
|
+
isRequireQuote = true;
|
244
|
+
} else if (c == '\r') {
|
245
|
+
if (policy == QuotePolicy.NONE) {
|
246
|
+
escapedValue.append(escape);
|
247
|
+
}
|
248
|
+
escapedValue.append(newline);
|
249
|
+
isRequireQuote = true;
|
250
|
+
} else if (c == '\n') {
|
251
|
+
if (previousChar != '\r') {
|
252
|
+
if (policy == QuotePolicy.NONE) {
|
253
|
+
escapedValue.append(escape);
|
254
|
+
}
|
255
|
+
escapedValue.append(newline);
|
256
|
+
isRequireQuote = true;
|
257
|
+
}
|
258
|
+
} else if (c == delimiter) {
|
259
|
+
if (policy == QuotePolicy.NONE) {
|
260
|
+
escapedValue.append(escape);
|
261
|
+
}
|
262
|
+
escapedValue.append(c);
|
263
|
+
isRequireQuote = true;
|
264
|
+
} else {
|
265
|
+
escapedValue.append(c);
|
266
|
+
}
|
267
|
+
previousChar = c;
|
268
|
+
}
|
269
|
+
|
270
|
+
if (policy != QuotePolicy.NONE && isRequireQuote) {
|
271
|
+
return setQuoteValue(escapedValue.toString(), quote);
|
272
|
+
} else {
|
273
|
+
return escapedValue.toString();
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
private String setQuoteValue(String v, char quote)
|
278
|
+
{
|
279
|
+
StringBuilder sb = new StringBuilder();
|
280
|
+
sb.append(quote);
|
281
|
+
sb.append(v);
|
282
|
+
sb.append(quote);
|
283
|
+
|
284
|
+
return sb.toString();
|
285
|
+
}
|
163
286
|
}
|
@@ -0,0 +1,309 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
5
|
+
import org.junit.Rule;
|
6
|
+
import org.junit.Test;
|
7
|
+
import java.lang.reflect.InvocationTargetException;
|
8
|
+
import java.lang.reflect.Method;
|
9
|
+
import static org.junit.Assert.assertEquals;
|
10
|
+
import java.nio.charset.Charset;
|
11
|
+
import org.embulk.EmbulkTestRuntime;
|
12
|
+
import org.embulk.config.ConfigSource;
|
13
|
+
import org.embulk.spi.Exec;
|
14
|
+
import org.embulk.spi.util.Newline;
|
15
|
+
|
16
|
+
public class TestCsvFormatterPlugin
|
17
|
+
{
|
18
|
+
@Rule
|
19
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
20
|
+
|
21
|
+
@Test
|
22
|
+
public void checkDefaultValues()
|
23
|
+
{
|
24
|
+
ConfigSource config = Exec.newConfigSource();
|
25
|
+
|
26
|
+
CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
|
27
|
+
assertEquals(Charset.forName("utf-8"), task.getCharset());
|
28
|
+
assertEquals(Newline.CRLF, task.getNewline());
|
29
|
+
assertEquals(true, task.getHeaderLine());
|
30
|
+
assertEquals(',', task.getDelimiterChar());
|
31
|
+
assertEquals('\"', task.getQuoteChar());
|
32
|
+
assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
|
33
|
+
assertEquals(false, task.getEscapeChar().isPresent());
|
34
|
+
assertEquals("", task.getNullString());
|
35
|
+
assertEquals(Newline.LF, task.getNewlineInField());
|
36
|
+
}
|
37
|
+
|
38
|
+
@Test
|
39
|
+
public void checkLoadConfig()
|
40
|
+
{
|
41
|
+
ConfigSource config = Exec.newConfigSource()
|
42
|
+
.set("charset", "utf-16")
|
43
|
+
.set("newline", "LF")
|
44
|
+
.set("header_line", false)
|
45
|
+
.set("delimiter", "\t")
|
46
|
+
.set("quote", "\\")
|
47
|
+
.set("quote_policy", "ALL")
|
48
|
+
.set("escape", "\"")
|
49
|
+
.set("null_string", "\\N")
|
50
|
+
.set("newline_in_field", "CRLF");
|
51
|
+
|
52
|
+
CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
|
53
|
+
assertEquals(Charset.forName("utf-16"), task.getCharset());
|
54
|
+
assertEquals(Newline.LF, task.getNewline());
|
55
|
+
assertEquals(false, task.getHeaderLine());
|
56
|
+
assertEquals('\t', task.getDelimiterChar());
|
57
|
+
assertEquals('\\', task.getQuoteChar());
|
58
|
+
assertEquals(CsvFormatterPlugin.QuotePolicy.ALL, task.getQuotePolicy());
|
59
|
+
assertEquals('\"', (char) task.getEscapeChar().get());
|
60
|
+
assertEquals("\\N", task.getNullString());
|
61
|
+
assertEquals(Newline.CRLF, task.getNewlineInField());
|
62
|
+
}
|
63
|
+
|
64
|
+
@Test
|
65
|
+
public void testQuoteValue()
|
66
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
67
|
+
{
|
68
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setQuoteValue", String.class, char.class);
|
69
|
+
method.setAccessible(true);
|
70
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
71
|
+
|
72
|
+
assertEquals("\"ABCD\"", method.invoke(formatter, "ABCD", '"'));
|
73
|
+
assertEquals("\"\"", method.invoke(formatter, "", '"'));
|
74
|
+
assertEquals("'ABCD'", method.invoke(formatter, "ABCD", '\''));
|
75
|
+
assertEquals("''", method.invoke(formatter, "", '\''));
|
76
|
+
}
|
77
|
+
|
78
|
+
@Test
|
79
|
+
public void testEscapeQuote()
|
80
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
81
|
+
{
|
82
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
83
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
84
|
+
method.setAccessible(true);
|
85
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
86
|
+
|
87
|
+
char delimiter = ',';
|
88
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
89
|
+
String newline = Newline.LF.getString();
|
90
|
+
|
91
|
+
assertEquals("\"AB\\\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '\\', newline, ""));
|
92
|
+
assertEquals("\"AB\"\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '"', newline, ""));
|
93
|
+
}
|
94
|
+
|
95
|
+
@Test
|
96
|
+
public void testQuotePolicyAll()
|
97
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
98
|
+
{
|
99
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
100
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
101
|
+
method.setAccessible(true);
|
102
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
103
|
+
|
104
|
+
char delimiter = ',';
|
105
|
+
char quote = '"';
|
106
|
+
char escape = '"';
|
107
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.ALL;
|
108
|
+
String newline = Newline.LF.getString();
|
109
|
+
String nullString = "";
|
110
|
+
|
111
|
+
@SuppressWarnings("unchecked")
|
112
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
113
|
+
ImmutableMap.of("expected", "\"true\"", "actual", "true"),
|
114
|
+
ImmutableMap.of("expected", "\"false\"", "actual", "false"),
|
115
|
+
ImmutableMap.of("expected", "\"0\"", "actual", "0"),
|
116
|
+
ImmutableMap.of("expected", "\"1\"", "actual", "1"),
|
117
|
+
ImmutableMap.of("expected", "\"1234\"", "actual", "1234"),
|
118
|
+
ImmutableMap.of("expected", "\"-1234\"", "actual", "-1234"),
|
119
|
+
ImmutableMap.of("expected", "\"+1234\"", "actual", "+1234"),
|
120
|
+
ImmutableMap.of("expected", "\"0x4d2\"", "actual", "0x4d2"),
|
121
|
+
ImmutableMap.of("expected", "\"123L\"", "actual", "123L"),
|
122
|
+
ImmutableMap.of("expected", "\"3.141592\"", "actual", "3.141592"),
|
123
|
+
ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
|
124
|
+
ImmutableMap.of("expected", "\"ABC\"", "actual", "ABC"),
|
125
|
+
ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
|
126
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
|
127
|
+
ImmutableMap.of("expected", "\"\"", "actual", ""),
|
128
|
+
ImmutableMap.of("expected", "\"NULL\"", "actual", "NULL"),
|
129
|
+
ImmutableMap.of("expected", "\"2015-01-01 12:01:01\"", "actual", "2015-01-01 12:01:01"),
|
130
|
+
ImmutableMap.of("expected", "\"20150101\"", "actual", "20150101"));
|
131
|
+
|
132
|
+
for (ImmutableMap testCase : testCases) {
|
133
|
+
String expected = (String) testCase.get("expected");
|
134
|
+
String actual = (String) testCase.get("actual");
|
135
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
@Test
|
140
|
+
public void testQuotePolicyMinimal()
|
141
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
142
|
+
{
|
143
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
144
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
145
|
+
method.setAccessible(true);
|
146
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
147
|
+
|
148
|
+
char delimiter = ',';
|
149
|
+
char quote = '"';
|
150
|
+
char escape = '"';
|
151
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
152
|
+
String newline = Newline.LF.getString();
|
153
|
+
String nullString = "";
|
154
|
+
|
155
|
+
@SuppressWarnings("unchecked")
|
156
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
157
|
+
ImmutableMap.of("expected", "true", "actual", "true"),
|
158
|
+
ImmutableMap.of("expected", "false", "actual", "false"),
|
159
|
+
ImmutableMap.of("expected", "0", "actual", "0"),
|
160
|
+
ImmutableMap.of("expected", "1", "actual", "1"),
|
161
|
+
ImmutableMap.of("expected", "1234", "actual", "1234"),
|
162
|
+
ImmutableMap.of("expected", "-1234", "actual", "-1234"),
|
163
|
+
ImmutableMap.of("expected", "+1234", "actual", "+1234"),
|
164
|
+
ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
|
165
|
+
ImmutableMap.of("expected", "123L", "actual", "123L"),
|
166
|
+
ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
|
167
|
+
ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
|
168
|
+
ImmutableMap.of("expected", "ABC", "actual", "ABC"),
|
169
|
+
ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
|
170
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
|
171
|
+
ImmutableMap.of("expected", "\"\"", "actual", ""),
|
172
|
+
ImmutableMap.of("expected", "NULL", "actual", "NULL"),
|
173
|
+
ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
|
174
|
+
ImmutableMap.of("expected", "20150101", "actual", "20150101"));
|
175
|
+
|
176
|
+
for (ImmutableMap testCase : testCases) {
|
177
|
+
String expected = (String) testCase.get("expected");
|
178
|
+
String actual = (String) testCase.get("actual");
|
179
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
@Test
|
184
|
+
public void testQuotePolicyNone()
|
185
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
186
|
+
{
|
187
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
188
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
189
|
+
method.setAccessible(true);
|
190
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
191
|
+
|
192
|
+
char delimiter = ',';
|
193
|
+
char quote = '"';
|
194
|
+
char escape = '"';
|
195
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.NONE;
|
196
|
+
String newline = Newline.LF.getString();
|
197
|
+
String nullString = "";
|
198
|
+
|
199
|
+
@SuppressWarnings("unchecked")
|
200
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
201
|
+
ImmutableMap.of("expected", "true", "actual", "true"),
|
202
|
+
ImmutableMap.of("expected", "false", "actual", "false"),
|
203
|
+
ImmutableMap.of("expected", "0", "actual", "0"),
|
204
|
+
ImmutableMap.of("expected", "1", "actual", "1"),
|
205
|
+
ImmutableMap.of("expected", "1234", "actual", "1234"),
|
206
|
+
ImmutableMap.of("expected", "-1234", "actual", "-1234"),
|
207
|
+
ImmutableMap.of("expected", "+1234", "actual", "+1234"),
|
208
|
+
ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
|
209
|
+
ImmutableMap.of("expected", "123L", "actual", "123L"),
|
210
|
+
ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
|
211
|
+
ImmutableMap.of("expected", "1\",000", "actual", "1,000"),
|
212
|
+
ImmutableMap.of("expected", "ABC", "actual", "ABC"),
|
213
|
+
ImmutableMap.of("expected", "ABC\"\"DEF", "actual", "ABC\"DEF"),
|
214
|
+
ImmutableMap.of("expected", "ABC\"\nDEF", "actual", "ABC\nDEF"),
|
215
|
+
ImmutableMap.of("expected", "", "actual", ""),
|
216
|
+
ImmutableMap.of("expected", "NULL", "actual", "NULL"),
|
217
|
+
ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
|
218
|
+
ImmutableMap.of("expected", "20150101", "actual", "20150101"));
|
219
|
+
|
220
|
+
for (ImmutableMap testCase : testCases) {
|
221
|
+
String expected = (String) testCase.get("expected");
|
222
|
+
String actual = (String) testCase.get("actual");
|
223
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
@Test
|
228
|
+
public void testNewlineInField()
|
229
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
230
|
+
{
|
231
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
232
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
233
|
+
method.setAccessible(true);
|
234
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
235
|
+
|
236
|
+
char delimiter = ',';
|
237
|
+
char quote = '"';
|
238
|
+
char escape = '"';
|
239
|
+
String newline;
|
240
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
241
|
+
String nullString = "";
|
242
|
+
|
243
|
+
ImmutableList<ImmutableMap<String, String>> testCases;
|
244
|
+
|
245
|
+
newline = Newline.LF.getString();
|
246
|
+
testCases = ImmutableList.of(
|
247
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\r\nDEF"),
|
248
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\rDEF"),
|
249
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"));
|
250
|
+
|
251
|
+
for (ImmutableMap testCase : testCases) {
|
252
|
+
String expected = (String) testCase.get("expected");
|
253
|
+
String actual = (String) testCase.get("actual");
|
254
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
newline = Newline.CRLF.getString();
|
259
|
+
testCases = ImmutableList.of(
|
260
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\r\nDEF"),
|
261
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\rDEF"),
|
262
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\nDEF"));
|
263
|
+
|
264
|
+
for (ImmutableMap testCase : testCases) {
|
265
|
+
String expected = (String) testCase.get("expected");
|
266
|
+
String actual = (String) testCase.get("actual");
|
267
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
268
|
+
}
|
269
|
+
|
270
|
+
|
271
|
+
newline = Newline.CR.getString();
|
272
|
+
testCases = ImmutableList.of(
|
273
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\r\nDEF"),
|
274
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\rDEF"),
|
275
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\nDEF"));
|
276
|
+
|
277
|
+
for (ImmutableMap testCase : testCases) {
|
278
|
+
String expected = (String) testCase.get("expected");
|
279
|
+
String actual = (String) testCase.get("actual");
|
280
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
281
|
+
}
|
282
|
+
}
|
283
|
+
|
284
|
+
@Test
|
285
|
+
public void testNullString()
|
286
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
287
|
+
{
|
288
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
289
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
290
|
+
method.setAccessible(true);
|
291
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
292
|
+
|
293
|
+
char delimiter = ',';
|
294
|
+
char quote = '"';
|
295
|
+
char escape = '"';
|
296
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
297
|
+
String newline = Newline.LF.getString();
|
298
|
+
|
299
|
+
assertEquals("\"\"", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
|
300
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
|
301
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
|
302
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
|
303
|
+
|
304
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
|
305
|
+
assertEquals("\"N/A\"", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
|
306
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
|
307
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
|
308
|
+
}
|
309
|
+
}
|
@@ -87,7 +87,6 @@ module Embulk::Guess
|
|
87
87
|
yes Yes YES
|
88
88
|
t T y Y
|
89
89
|
on On ON
|
90
|
-
1
|
91
90
|
].map {|k| [k, true] }]
|
92
91
|
|
93
92
|
# When matching to false string, then retrun 'true'
|
@@ -96,7 +95,6 @@ module Embulk::Guess
|
|
96
95
|
no No NO
|
97
96
|
f N n N
|
98
97
|
off Off OFF
|
99
|
-
0
|
100
98
|
].map {|k| [k, true] }]
|
101
99
|
|
102
100
|
TYPE_COALESCE = Hash[{
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- embulk-docs/src/release/release-0.6.6.rst
|
301
301
|
- embulk-docs/src/release/release-0.6.7.rst
|
302
302
|
- embulk-docs/src/release/release-0.6.8.rst
|
303
|
+
- embulk-docs/src/release/release-0.6.9.rst
|
303
304
|
- embulk-standards/build.gradle
|
304
305
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
305
306
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -313,6 +314,7 @@ files:
|
|
313
314
|
- embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java
|
314
315
|
- embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java
|
315
316
|
- embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension
|
317
|
+
- embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java
|
316
318
|
- embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java
|
317
319
|
- embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java
|
318
320
|
- embulk.gemspec
|
@@ -406,8 +408,8 @@ files:
|
|
406
408
|
- classpath/bval-jsr303-0.5.jar
|
407
409
|
- classpath/commons-beanutils-core-1.8.3.jar
|
408
410
|
- classpath/commons-lang3-3.1.jar
|
409
|
-
- classpath/embulk-core-0.6.
|
410
|
-
- classpath/embulk-standards-0.6.
|
411
|
+
- classpath/embulk-core-0.6.9.jar
|
412
|
+
- classpath/embulk-standards-0.6.9.jar
|
411
413
|
- classpath/guava-18.0.jar
|
412
414
|
- classpath/guice-4.0.jar
|
413
415
|
- classpath/guice-multibindings-4.0.jar
|