embulk 0.6.8 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -1
- data/embulk-docs/src/built-in.rst +40 -13
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.9.rst +24 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +137 -14
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +309 -0
- data/lib/embulk/guess/schema_guess.rb +0 -2
- data/lib/embulk/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19a42567d7e841a769ba329935d942926a062f66
|
4
|
+
data.tar.gz: 0bac8733cc4f5028e62865c08193520fce7f9d79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b74b82f6d740d513cce0eadc365824a78737a2cd40bcac32082ff51f03259cd720140eb7463530b3ffe8d8a7d8601a4d91de889a891760888c4b8ddfe9b4bede
|
7
|
+
data.tar.gz: 5a77c5eba35a06074289aab700f1351ced771f1f181cf5831d4ccf747692444063be0c32133a8d5bbf5d30fe6e636db398f591898135b351e81aeba7b90bb3db
|
data/build.gradle
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.spi.util;
|
2
2
|
|
3
3
|
import java.io.Writer;
|
4
|
+
import java.io.BufferedWriter;
|
4
5
|
import java.io.OutputStreamWriter;
|
5
6
|
import java.io.IOException;
|
6
7
|
import java.nio.charset.Charset;
|
@@ -47,7 +48,7 @@ public class LineEncoder
|
|
47
48
|
this.newline = task.getNewline().getString();
|
48
49
|
this.underlyingFileOutput = out;
|
49
50
|
this.outputStream = new FileOutputOutputStream(underlyingFileOutput, task.getBufferAllocator(), FileOutputOutputStream.CloseMode.FLUSH_FINISH);
|
50
|
-
this.writer = new OutputStreamWriter(outputStream, encoder);
|
51
|
+
this.writer = new BufferedWriter(new OutputStreamWriter(outputStream, encoder), 32*1024);
|
51
52
|
}
|
52
53
|
|
53
54
|
public void addNewLine()
|
@@ -285,17 +285,39 @@ The ``csv`` formatter plugin formats records using CSV or TSV format.
|
|
285
285
|
Options
|
286
286
|
~~~~~~~~~~~~~~~~~~
|
287
287
|
|
288
|
-
|
289
|
-
| name
|
290
|
-
|
291
|
-
| delimiter
|
292
|
-
|
293
|
-
|
|
294
|
-
|
295
|
-
|
|
296
|
-
|
297
|
-
|
|
298
|
-
|
288
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
289
|
+
| name | type | description | required? |
|
290
|
+
+======================+=========+=======================================================================================================+========================+
|
291
|
+
| delimiter | string | Delimiter character such as ``,`` for CSV, ``"\t"`` for TSV, ``"|"`` or any single-byte character | ``,`` by default |
|
292
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
293
|
+
| quote | string | The character surrounding a quoted value | ``\"`` by default |
|
294
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
295
|
+
| quote\_policy | enum | Policy for quote (ALL, MINIMAL, NONE) (see below) | ``MINIMAL`` by default |
|
296
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
297
|
+
| escape | string | Escape character to escape a quote character when quote\_policy is ALL or MINIMAL | ``\"`` by default |
|
298
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
299
|
+
| header\_line | boolean | If true, write the header line with column name at the first line | ``true`` by default |
|
300
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
301
|
+
| null_string | string | Expression of NULL values | empty by default |
|
302
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
303
|
+
| newline | enum | Newline character (CRLF, LF or CR) | ``CRLF`` by default |
|
304
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
305
|
+
| newline\_in\_field | enum | Newline character in each field (CRLF, LF, CR) | ``LF`` by default |
|
306
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
307
|
+
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
308
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
309
|
+
|
310
|
+
The ``quote_policy`` option is used to determine field type to quote.
|
311
|
+
|
312
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
313
|
+
| name | description |
|
314
|
+
+============+========================================================================================================+
|
315
|
+
| ALL | Quote all fields |
|
316
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
317
|
+
| MINIMAL | Only quote those fields which contain delimiter, quote or any of the characters in lineterminator |
|
318
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
319
|
+
| NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
|
320
|
+
+------------+--------------------------------------------------------------------------------------------------------+
|
299
321
|
|
300
322
|
Example
|
301
323
|
~~~~~~~~~~~~~~~~~~
|
@@ -306,9 +328,14 @@ Example
|
|
306
328
|
...
|
307
329
|
formatter:
|
308
330
|
- type: csv
|
309
|
-
delimiter:
|
310
|
-
newline:
|
331
|
+
delimiter: '\t'
|
332
|
+
newline: CRLF
|
333
|
+
newline_in_field: LF
|
311
334
|
charset: UTF-8
|
335
|
+
quote_policy: MINIMAL
|
336
|
+
quote: '"'
|
337
|
+
escape: '\\'
|
338
|
+
null_string: '\\N'
|
312
339
|
|
313
340
|
Gzip encoder plugin
|
314
341
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
Release 0.6.9
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``formatter-csv`` supports ``quote``, ``quote_policy``, ``escape``, ``newline_in_field``, and ``null_string`` options (@sakama++)
|
8
|
+
|
9
|
+
* ``quote_policy`` controls how to quote values. It can be either of ``ALL`` (quote all values), ``MINIMAL`` (quote if a value includes delimiter or quote character), or ``NONE`` (never quotes).
|
10
|
+
|
11
|
+
* ``escape`` controls how to escape quote character in a quoted string. The default is ``"`` (``"`` will be ``""``). Some applications may set it to ``\`` (``"`` will be ``\"``)
|
12
|
+
|
13
|
+
* ``null_string`` controls how to write NULL values. The default is ``""`` (empty string). You can use any strings such as ``\N`` or ``#N/A``.
|
14
|
+
|
15
|
+
* ``guess-csv`` guesses columns which contain only 0 and 1 in first 32KB as long type rather than boolean type.
|
16
|
+
|
17
|
+
General Changes
|
18
|
+
------------------
|
19
|
+
|
20
|
+
* ``spi.util.LineEncoder`` uses buffered writer. This improves performance of ``formatter-csv`` upto 10%.
|
21
|
+
|
22
|
+
Release Date
|
23
|
+
------------------
|
24
|
+
2015-05-14
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
3
4
|
import com.google.common.collect.ImmutableBiMap;
|
4
5
|
import com.google.common.collect.ImmutableMap;
|
5
6
|
import org.embulk.config.Config;
|
@@ -20,11 +21,31 @@ import org.embulk.spi.Exec;
|
|
20
21
|
import org.embulk.spi.FileOutput;
|
21
22
|
import org.embulk.spi.util.LineEncoder;
|
22
23
|
|
24
|
+
import org.embulk.spi.util.Newline;
|
23
25
|
import java.util.Map;
|
24
26
|
|
25
27
|
public class CsvFormatterPlugin
|
26
28
|
implements FormatterPlugin
|
27
29
|
{
|
30
|
+
public enum QuotePolicy
|
31
|
+
{
|
32
|
+
ALL("ALL"),
|
33
|
+
MINIMAL("MINIMAL"),
|
34
|
+
NONE("NONE");
|
35
|
+
|
36
|
+
private final String string;
|
37
|
+
|
38
|
+
private QuotePolicy(String string)
|
39
|
+
{
|
40
|
+
this.string = string;
|
41
|
+
}
|
42
|
+
|
43
|
+
public String getString()
|
44
|
+
{
|
45
|
+
return string;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
28
49
|
public interface PluginTask
|
29
50
|
extends LineEncoder.EncoderTask, TimestampFormatter.FormatterTask
|
30
51
|
{
|
@@ -34,7 +55,27 @@ public class CsvFormatterPlugin
|
|
34
55
|
|
35
56
|
@Config("delimiter")
|
36
57
|
@ConfigDefault("\",\"")
|
37
|
-
public
|
58
|
+
public char getDelimiterChar();
|
59
|
+
|
60
|
+
@Config("quote")
|
61
|
+
@ConfigDefault("\"\\\"\"")
|
62
|
+
public char getQuoteChar();
|
63
|
+
|
64
|
+
@Config("quote_policy")
|
65
|
+
@ConfigDefault("\"MINIMAL\"")
|
66
|
+
public QuotePolicy getQuotePolicy();
|
67
|
+
|
68
|
+
@Config("escape")
|
69
|
+
@ConfigDefault("null")
|
70
|
+
public Optional<Character> getEscapeChar();
|
71
|
+
|
72
|
+
@Config("null_string")
|
73
|
+
@ConfigDefault("\"\"")
|
74
|
+
public String getNullString();
|
75
|
+
|
76
|
+
@Config("newline_in_field")
|
77
|
+
@ConfigDefault("\"LF\"")
|
78
|
+
public Newline getNewlineInField();
|
38
79
|
}
|
39
80
|
|
40
81
|
@Override
|
@@ -66,18 +107,24 @@ public class CsvFormatterPlugin
|
|
66
107
|
final LineEncoder encoder = new LineEncoder(output, task);
|
67
108
|
final Map<Integer, TimestampFormatter> timestampFormatters =
|
68
109
|
newTimestampFormatters(task, schema);
|
69
|
-
final
|
110
|
+
final char delimiter = task.getDelimiterChar();
|
111
|
+
final QuotePolicy quotePolicy = task.getQuotePolicy();
|
112
|
+
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
|
113
|
+
final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : '\"');
|
114
|
+
final String newlineInField = task.getNewlineInField().getString();
|
115
|
+
final String nullString = task.getNullString();
|
70
116
|
|
71
117
|
// create a file
|
72
118
|
encoder.nextFile();
|
73
119
|
|
74
120
|
// write header
|
75
121
|
if (task.getHeaderLine()) {
|
76
|
-
writeHeader(schema, encoder, delimiter);
|
122
|
+
writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
|
77
123
|
}
|
78
124
|
|
79
125
|
return new PageOutput() {
|
80
126
|
private final PageReader pageReader = new PageReader(schema);
|
127
|
+
private final String delimiterString = String.valueOf(delimiter);
|
81
128
|
|
82
129
|
public void add(Page page)
|
83
130
|
{
|
@@ -88,7 +135,9 @@ public class CsvFormatterPlugin
|
|
88
135
|
{
|
89
136
|
addDelimiter(column);
|
90
137
|
if (!pageReader.isNull(column)) {
|
91
|
-
|
138
|
+
addValue(Boolean.toString(pageReader.getBoolean(column)));
|
139
|
+
} else {
|
140
|
+
addNullString();
|
92
141
|
}
|
93
142
|
}
|
94
143
|
|
@@ -96,7 +145,9 @@ public class CsvFormatterPlugin
|
|
96
145
|
{
|
97
146
|
addDelimiter(column);
|
98
147
|
if (!pageReader.isNull(column)) {
|
99
|
-
|
148
|
+
addValue(Long.toString(pageReader.getLong(column)));
|
149
|
+
} else {
|
150
|
+
addNullString();
|
100
151
|
}
|
101
152
|
}
|
102
153
|
|
@@ -104,7 +155,9 @@ public class CsvFormatterPlugin
|
|
104
155
|
{
|
105
156
|
addDelimiter(column);
|
106
157
|
if (!pageReader.isNull(column)) {
|
107
|
-
|
158
|
+
addValue(Double.toString(pageReader.getDouble(column)));
|
159
|
+
} else {
|
160
|
+
addNullString();
|
108
161
|
}
|
109
162
|
}
|
110
163
|
|
@@ -112,8 +165,9 @@ public class CsvFormatterPlugin
|
|
112
165
|
{
|
113
166
|
addDelimiter(column);
|
114
167
|
if (!pageReader.isNull(column)) {
|
115
|
-
|
116
|
-
|
168
|
+
addValue(pageReader.getString(column));
|
169
|
+
} else {
|
170
|
+
addNullString();
|
117
171
|
}
|
118
172
|
}
|
119
173
|
|
@@ -122,18 +176,29 @@ public class CsvFormatterPlugin
|
|
122
176
|
addDelimiter(column);
|
123
177
|
if (!pageReader.isNull(column)) {
|
124
178
|
Timestamp value = pageReader.getTimestamp(column);
|
125
|
-
|
179
|
+
addValue(timestampFormatters.get(column.getIndex()).format(value));
|
180
|
+
} else {
|
181
|
+
addNullString();
|
126
182
|
}
|
127
183
|
}
|
128
184
|
|
129
185
|
private void addDelimiter(Column column)
|
130
186
|
{
|
131
187
|
if (column.getIndex() != 0) {
|
132
|
-
encoder.addText(
|
188
|
+
encoder.addText(delimiterString);
|
133
189
|
}
|
134
190
|
}
|
135
|
-
});
|
136
191
|
|
192
|
+
private void addValue(String v)
|
193
|
+
{
|
194
|
+
encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
|
195
|
+
}
|
196
|
+
|
197
|
+
private void addNullString()
|
198
|
+
{
|
199
|
+
encoder.addText(nullString);
|
200
|
+
}
|
201
|
+
});
|
137
202
|
encoder.addNewLine();
|
138
203
|
}
|
139
204
|
}
|
@@ -150,14 +215,72 @@ public class CsvFormatterPlugin
|
|
150
215
|
};
|
151
216
|
}
|
152
217
|
|
153
|
-
private void writeHeader(Schema schema, LineEncoder encoder, String
|
218
|
+
private void writeHeader(Schema schema, LineEncoder encoder, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
|
154
219
|
{
|
220
|
+
String delimiterString = String.valueOf(delimiter);
|
155
221
|
for (Column column : schema.getColumns()) {
|
156
222
|
if (column.getIndex() != 0) {
|
157
|
-
encoder.addText(
|
223
|
+
encoder.addText(delimiterString);
|
158
224
|
}
|
159
|
-
encoder.addText(column.getName());
|
225
|
+
encoder.addText(setEscapeAndQuoteValue(column.getName(), delimiter, policy, quote, escape, newline, nullString));
|
160
226
|
}
|
161
227
|
encoder.addNewLine();
|
162
228
|
}
|
229
|
+
|
230
|
+
private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
|
231
|
+
{
|
232
|
+
StringBuilder escapedValue = new StringBuilder();
|
233
|
+
char previousChar = ' ';
|
234
|
+
|
235
|
+
boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)) ? true : false;
|
236
|
+
|
237
|
+
for (int i = 0; i < v.length(); i++) {
|
238
|
+
char c = v.charAt(i);
|
239
|
+
|
240
|
+
if (c == quote) {
|
241
|
+
escapedValue.append(escape);
|
242
|
+
escapedValue.append(c);
|
243
|
+
isRequireQuote = true;
|
244
|
+
} else if (c == '\r') {
|
245
|
+
if (policy == QuotePolicy.NONE) {
|
246
|
+
escapedValue.append(escape);
|
247
|
+
}
|
248
|
+
escapedValue.append(newline);
|
249
|
+
isRequireQuote = true;
|
250
|
+
} else if (c == '\n') {
|
251
|
+
if (previousChar != '\r') {
|
252
|
+
if (policy == QuotePolicy.NONE) {
|
253
|
+
escapedValue.append(escape);
|
254
|
+
}
|
255
|
+
escapedValue.append(newline);
|
256
|
+
isRequireQuote = true;
|
257
|
+
}
|
258
|
+
} else if (c == delimiter) {
|
259
|
+
if (policy == QuotePolicy.NONE) {
|
260
|
+
escapedValue.append(escape);
|
261
|
+
}
|
262
|
+
escapedValue.append(c);
|
263
|
+
isRequireQuote = true;
|
264
|
+
} else {
|
265
|
+
escapedValue.append(c);
|
266
|
+
}
|
267
|
+
previousChar = c;
|
268
|
+
}
|
269
|
+
|
270
|
+
if (policy != QuotePolicy.NONE && isRequireQuote) {
|
271
|
+
return setQuoteValue(escapedValue.toString(), quote);
|
272
|
+
} else {
|
273
|
+
return escapedValue.toString();
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
private String setQuoteValue(String v, char quote)
|
278
|
+
{
|
279
|
+
StringBuilder sb = new StringBuilder();
|
280
|
+
sb.append(quote);
|
281
|
+
sb.append(v);
|
282
|
+
sb.append(quote);
|
283
|
+
|
284
|
+
return sb.toString();
|
285
|
+
}
|
163
286
|
}
|
@@ -0,0 +1,309 @@
|
|
1
|
+
package org.embulk.standards;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
5
|
+
import org.junit.Rule;
|
6
|
+
import org.junit.Test;
|
7
|
+
import java.lang.reflect.InvocationTargetException;
|
8
|
+
import java.lang.reflect.Method;
|
9
|
+
import static org.junit.Assert.assertEquals;
|
10
|
+
import java.nio.charset.Charset;
|
11
|
+
import org.embulk.EmbulkTestRuntime;
|
12
|
+
import org.embulk.config.ConfigSource;
|
13
|
+
import org.embulk.spi.Exec;
|
14
|
+
import org.embulk.spi.util.Newline;
|
15
|
+
|
16
|
+
public class TestCsvFormatterPlugin
|
17
|
+
{
|
18
|
+
@Rule
|
19
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
20
|
+
|
21
|
+
@Test
|
22
|
+
public void checkDefaultValues()
|
23
|
+
{
|
24
|
+
ConfigSource config = Exec.newConfigSource();
|
25
|
+
|
26
|
+
CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
|
27
|
+
assertEquals(Charset.forName("utf-8"), task.getCharset());
|
28
|
+
assertEquals(Newline.CRLF, task.getNewline());
|
29
|
+
assertEquals(true, task.getHeaderLine());
|
30
|
+
assertEquals(',', task.getDelimiterChar());
|
31
|
+
assertEquals('\"', task.getQuoteChar());
|
32
|
+
assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
|
33
|
+
assertEquals(false, task.getEscapeChar().isPresent());
|
34
|
+
assertEquals("", task.getNullString());
|
35
|
+
assertEquals(Newline.LF, task.getNewlineInField());
|
36
|
+
}
|
37
|
+
|
38
|
+
@Test
|
39
|
+
public void checkLoadConfig()
|
40
|
+
{
|
41
|
+
ConfigSource config = Exec.newConfigSource()
|
42
|
+
.set("charset", "utf-16")
|
43
|
+
.set("newline", "LF")
|
44
|
+
.set("header_line", false)
|
45
|
+
.set("delimiter", "\t")
|
46
|
+
.set("quote", "\\")
|
47
|
+
.set("quote_policy", "ALL")
|
48
|
+
.set("escape", "\"")
|
49
|
+
.set("null_string", "\\N")
|
50
|
+
.set("newline_in_field", "CRLF");
|
51
|
+
|
52
|
+
CsvFormatterPlugin.PluginTask task = config.loadConfig(CsvFormatterPlugin.PluginTask.class);
|
53
|
+
assertEquals(Charset.forName("utf-16"), task.getCharset());
|
54
|
+
assertEquals(Newline.LF, task.getNewline());
|
55
|
+
assertEquals(false, task.getHeaderLine());
|
56
|
+
assertEquals('\t', task.getDelimiterChar());
|
57
|
+
assertEquals('\\', task.getQuoteChar());
|
58
|
+
assertEquals(CsvFormatterPlugin.QuotePolicy.ALL, task.getQuotePolicy());
|
59
|
+
assertEquals('\"', (char) task.getEscapeChar().get());
|
60
|
+
assertEquals("\\N", task.getNullString());
|
61
|
+
assertEquals(Newline.CRLF, task.getNewlineInField());
|
62
|
+
}
|
63
|
+
|
64
|
+
@Test
|
65
|
+
public void testQuoteValue()
|
66
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
67
|
+
{
|
68
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setQuoteValue", String.class, char.class);
|
69
|
+
method.setAccessible(true);
|
70
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
71
|
+
|
72
|
+
assertEquals("\"ABCD\"", method.invoke(formatter, "ABCD", '"'));
|
73
|
+
assertEquals("\"\"", method.invoke(formatter, "", '"'));
|
74
|
+
assertEquals("'ABCD'", method.invoke(formatter, "ABCD", '\''));
|
75
|
+
assertEquals("''", method.invoke(formatter, "", '\''));
|
76
|
+
}
|
77
|
+
|
78
|
+
@Test
|
79
|
+
public void testEscapeQuote()
|
80
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
81
|
+
{
|
82
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
83
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
84
|
+
method.setAccessible(true);
|
85
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
86
|
+
|
87
|
+
char delimiter = ',';
|
88
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
89
|
+
String newline = Newline.LF.getString();
|
90
|
+
|
91
|
+
assertEquals("\"AB\\\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '\\', newline, ""));
|
92
|
+
assertEquals("\"AB\"\"CD\"", method.invoke(formatter, "AB\"CD", delimiter, policy, '"', '"', newline, ""));
|
93
|
+
}
|
94
|
+
|
95
|
+
@Test
|
96
|
+
public void testQuotePolicyAll()
|
97
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
98
|
+
{
|
99
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
100
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
101
|
+
method.setAccessible(true);
|
102
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
103
|
+
|
104
|
+
char delimiter = ',';
|
105
|
+
char quote = '"';
|
106
|
+
char escape = '"';
|
107
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.ALL;
|
108
|
+
String newline = Newline.LF.getString();
|
109
|
+
String nullString = "";
|
110
|
+
|
111
|
+
@SuppressWarnings("unchecked")
|
112
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
113
|
+
ImmutableMap.of("expected", "\"true\"", "actual", "true"),
|
114
|
+
ImmutableMap.of("expected", "\"false\"", "actual", "false"),
|
115
|
+
ImmutableMap.of("expected", "\"0\"", "actual", "0"),
|
116
|
+
ImmutableMap.of("expected", "\"1\"", "actual", "1"),
|
117
|
+
ImmutableMap.of("expected", "\"1234\"", "actual", "1234"),
|
118
|
+
ImmutableMap.of("expected", "\"-1234\"", "actual", "-1234"),
|
119
|
+
ImmutableMap.of("expected", "\"+1234\"", "actual", "+1234"),
|
120
|
+
ImmutableMap.of("expected", "\"0x4d2\"", "actual", "0x4d2"),
|
121
|
+
ImmutableMap.of("expected", "\"123L\"", "actual", "123L"),
|
122
|
+
ImmutableMap.of("expected", "\"3.141592\"", "actual", "3.141592"),
|
123
|
+
ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
|
124
|
+
ImmutableMap.of("expected", "\"ABC\"", "actual", "ABC"),
|
125
|
+
ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
|
126
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
|
127
|
+
ImmutableMap.of("expected", "\"\"", "actual", ""),
|
128
|
+
ImmutableMap.of("expected", "\"NULL\"", "actual", "NULL"),
|
129
|
+
ImmutableMap.of("expected", "\"2015-01-01 12:01:01\"", "actual", "2015-01-01 12:01:01"),
|
130
|
+
ImmutableMap.of("expected", "\"20150101\"", "actual", "20150101"));
|
131
|
+
|
132
|
+
for (ImmutableMap testCase : testCases) {
|
133
|
+
String expected = (String) testCase.get("expected");
|
134
|
+
String actual = (String) testCase.get("actual");
|
135
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
@Test
|
140
|
+
public void testQuotePolicyMinimal()
|
141
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
142
|
+
{
|
143
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
144
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
145
|
+
method.setAccessible(true);
|
146
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
147
|
+
|
148
|
+
char delimiter = ',';
|
149
|
+
char quote = '"';
|
150
|
+
char escape = '"';
|
151
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
152
|
+
String newline = Newline.LF.getString();
|
153
|
+
String nullString = "";
|
154
|
+
|
155
|
+
@SuppressWarnings("unchecked")
|
156
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
157
|
+
ImmutableMap.of("expected", "true", "actual", "true"),
|
158
|
+
ImmutableMap.of("expected", "false", "actual", "false"),
|
159
|
+
ImmutableMap.of("expected", "0", "actual", "0"),
|
160
|
+
ImmutableMap.of("expected", "1", "actual", "1"),
|
161
|
+
ImmutableMap.of("expected", "1234", "actual", "1234"),
|
162
|
+
ImmutableMap.of("expected", "-1234", "actual", "-1234"),
|
163
|
+
ImmutableMap.of("expected", "+1234", "actual", "+1234"),
|
164
|
+
ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
|
165
|
+
ImmutableMap.of("expected", "123L", "actual", "123L"),
|
166
|
+
ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
|
167
|
+
ImmutableMap.of("expected", "\"1,000\"", "actual", "1,000"),
|
168
|
+
ImmutableMap.of("expected", "ABC", "actual", "ABC"),
|
169
|
+
ImmutableMap.of("expected", "\"ABC\"\"DEF\"", "actual", "ABC\"DEF"),
|
170
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"),
|
171
|
+
ImmutableMap.of("expected", "\"\"", "actual", ""),
|
172
|
+
ImmutableMap.of("expected", "NULL", "actual", "NULL"),
|
173
|
+
ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
|
174
|
+
ImmutableMap.of("expected", "20150101", "actual", "20150101"));
|
175
|
+
|
176
|
+
for (ImmutableMap testCase : testCases) {
|
177
|
+
String expected = (String) testCase.get("expected");
|
178
|
+
String actual = (String) testCase.get("actual");
|
179
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
@Test
|
184
|
+
public void testQuotePolicyNone()
|
185
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
186
|
+
{
|
187
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
188
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
189
|
+
method.setAccessible(true);
|
190
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
191
|
+
|
192
|
+
char delimiter = ',';
|
193
|
+
char quote = '"';
|
194
|
+
char escape = '"';
|
195
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.NONE;
|
196
|
+
String newline = Newline.LF.getString();
|
197
|
+
String nullString = "";
|
198
|
+
|
199
|
+
@SuppressWarnings("unchecked")
|
200
|
+
ImmutableList<ImmutableMap<String, String>> testCases = ImmutableList.of(
|
201
|
+
ImmutableMap.of("expected", "true", "actual", "true"),
|
202
|
+
ImmutableMap.of("expected", "false", "actual", "false"),
|
203
|
+
ImmutableMap.of("expected", "0", "actual", "0"),
|
204
|
+
ImmutableMap.of("expected", "1", "actual", "1"),
|
205
|
+
ImmutableMap.of("expected", "1234", "actual", "1234"),
|
206
|
+
ImmutableMap.of("expected", "-1234", "actual", "-1234"),
|
207
|
+
ImmutableMap.of("expected", "+1234", "actual", "+1234"),
|
208
|
+
ImmutableMap.of("expected", "0x4d2", "actual", "0x4d2"),
|
209
|
+
ImmutableMap.of("expected", "123L", "actual", "123L"),
|
210
|
+
ImmutableMap.of("expected", "3.141592", "actual", "3.141592"),
|
211
|
+
ImmutableMap.of("expected", "1\",000", "actual", "1,000"),
|
212
|
+
ImmutableMap.of("expected", "ABC", "actual", "ABC"),
|
213
|
+
ImmutableMap.of("expected", "ABC\"\"DEF", "actual", "ABC\"DEF"),
|
214
|
+
ImmutableMap.of("expected", "ABC\"\nDEF", "actual", "ABC\nDEF"),
|
215
|
+
ImmutableMap.of("expected", "", "actual", ""),
|
216
|
+
ImmutableMap.of("expected", "NULL", "actual", "NULL"),
|
217
|
+
ImmutableMap.of("expected", "2015-01-01 12:01:01", "actual", "2015-01-01 12:01:01"),
|
218
|
+
ImmutableMap.of("expected", "20150101", "actual", "20150101"));
|
219
|
+
|
220
|
+
for (ImmutableMap testCase : testCases) {
|
221
|
+
String expected = (String) testCase.get("expected");
|
222
|
+
String actual = (String) testCase.get("actual");
|
223
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
@Test
|
228
|
+
public void testNewlineInField()
|
229
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
230
|
+
{
|
231
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
232
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
233
|
+
method.setAccessible(true);
|
234
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
235
|
+
|
236
|
+
char delimiter = ',';
|
237
|
+
char quote = '"';
|
238
|
+
char escape = '"';
|
239
|
+
String newline;
|
240
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
241
|
+
String nullString = "";
|
242
|
+
|
243
|
+
ImmutableList<ImmutableMap<String, String>> testCases;
|
244
|
+
|
245
|
+
newline = Newline.LF.getString();
|
246
|
+
testCases = ImmutableList.of(
|
247
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\r\nDEF"),
|
248
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\rDEF"),
|
249
|
+
ImmutableMap.of("expected", "\"ABC\nDEF\"", "actual", "ABC\nDEF"));
|
250
|
+
|
251
|
+
for (ImmutableMap testCase : testCases) {
|
252
|
+
String expected = (String) testCase.get("expected");
|
253
|
+
String actual = (String) testCase.get("actual");
|
254
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
newline = Newline.CRLF.getString();
|
259
|
+
testCases = ImmutableList.of(
|
260
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\r\nDEF"),
|
261
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\rDEF"),
|
262
|
+
ImmutableMap.of("expected", "\"ABC\r\nDEF\"", "actual", "ABC\nDEF"));
|
263
|
+
|
264
|
+
for (ImmutableMap testCase : testCases) {
|
265
|
+
String expected = (String) testCase.get("expected");
|
266
|
+
String actual = (String) testCase.get("actual");
|
267
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
268
|
+
}
|
269
|
+
|
270
|
+
|
271
|
+
newline = Newline.CR.getString();
|
272
|
+
testCases = ImmutableList.of(
|
273
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\r\nDEF"),
|
274
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\rDEF"),
|
275
|
+
ImmutableMap.of("expected", "\"ABC\rDEF\"", "actual", "ABC\nDEF"));
|
276
|
+
|
277
|
+
for (ImmutableMap testCase : testCases) {
|
278
|
+
String expected = (String) testCase.get("expected");
|
279
|
+
String actual = (String) testCase.get("actual");
|
280
|
+
assertEquals(expected, method.invoke(formatter, actual, delimiter, policy, quote, escape, newline, nullString));
|
281
|
+
}
|
282
|
+
}
|
283
|
+
|
284
|
+
@Test
|
285
|
+
public void testNullString()
|
286
|
+
throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
|
287
|
+
{
|
288
|
+
Method method = CsvFormatterPlugin.class.getDeclaredMethod("setEscapeAndQuoteValue", String.class, char.class,
|
289
|
+
CsvFormatterPlugin.QuotePolicy.class, char.class, char.class, String.class, String.class);
|
290
|
+
method.setAccessible(true);
|
291
|
+
CsvFormatterPlugin formatter = new CsvFormatterPlugin();
|
292
|
+
|
293
|
+
char delimiter = ',';
|
294
|
+
char quote = '"';
|
295
|
+
char escape = '"';
|
296
|
+
CsvFormatterPlugin.QuotePolicy policy = CsvFormatterPlugin.QuotePolicy.MINIMAL;
|
297
|
+
String newline = Newline.LF.getString();
|
298
|
+
|
299
|
+
assertEquals("\"\"", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
|
300
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, ""));
|
301
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
|
302
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, ""));
|
303
|
+
|
304
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
|
305
|
+
assertEquals("\"N/A\"", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.MINIMAL, quote, escape, newline, "N/A"));
|
306
|
+
assertEquals("", method.invoke(formatter, "", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
|
307
|
+
assertEquals("N/A", method.invoke(formatter, "N/A", delimiter, CsvFormatterPlugin.QuotePolicy.NONE, quote, escape, newline, "N/A"));
|
308
|
+
}
|
309
|
+
}
|
@@ -87,7 +87,6 @@ module Embulk::Guess
|
|
87
87
|
yes Yes YES
|
88
88
|
t T y Y
|
89
89
|
on On ON
|
90
|
-
1
|
91
90
|
].map {|k| [k, true] }]
|
92
91
|
|
93
92
|
# When matching to false string, then retrun 'true'
|
@@ -96,7 +95,6 @@ module Embulk::Guess
|
|
96
95
|
no No NO
|
97
96
|
f N n N
|
98
97
|
off Off OFF
|
99
|
-
0
|
100
98
|
].map {|k| [k, true] }]
|
101
99
|
|
102
100
|
TYPE_COALESCE = Hash[{
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- embulk-docs/src/release/release-0.6.6.rst
|
301
301
|
- embulk-docs/src/release/release-0.6.7.rst
|
302
302
|
- embulk-docs/src/release/release-0.6.8.rst
|
303
|
+
- embulk-docs/src/release/release-0.6.9.rst
|
303
304
|
- embulk-standards/build.gradle
|
304
305
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
305
306
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -313,6 +314,7 @@ files:
|
|
313
314
|
- embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java
|
314
315
|
- embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java
|
315
316
|
- embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension
|
317
|
+
- embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java
|
316
318
|
- embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java
|
317
319
|
- embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java
|
318
320
|
- embulk.gemspec
|
@@ -406,8 +408,8 @@ files:
|
|
406
408
|
- classpath/bval-jsr303-0.5.jar
|
407
409
|
- classpath/commons-beanutils-core-1.8.3.jar
|
408
410
|
- classpath/commons-lang3-3.1.jar
|
409
|
-
- classpath/embulk-core-0.6.
|
410
|
-
- classpath/embulk-standards-0.6.
|
411
|
+
- classpath/embulk-core-0.6.9.jar
|
412
|
+
- classpath/embulk-standards-0.6.9.jar
|
411
413
|
- classpath/guava-18.0.jar
|
412
414
|
- classpath/guice-4.0.jar
|
413
415
|
- classpath/guice-multibindings-4.0.jar
|