embulk 0.6.13 → 0.6.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +2 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +8 -1
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +28 -6
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +12 -77
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +35 -2
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +22 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +40 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +2 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -22
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +10 -10
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +12 -12
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +49 -31
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +15 -19
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +67 -0
- data/embulk-docs/src/built-in.rst +18 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.14.rst +47 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +26 -10
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +21 -6
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +4 -6
- data/lib/embulk/guess/csv.rb +9 -5
- data/lib/embulk/plugin_registry.rb +15 -11
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
@@ -3,7 +3,9 @@ package org.embulk.spi.time;
|
|
3
3
|
import org.junit.Rule;
|
4
4
|
import org.junit.Before;
|
5
5
|
import org.junit.Test;
|
6
|
+
import com.google.common.base.Optional;
|
6
7
|
import static org.junit.Assert.assertEquals;
|
8
|
+
import org.embulk.config.Task;
|
7
9
|
import org.embulk.config.Config;
|
8
10
|
import org.embulk.config.ConfigSource;
|
9
11
|
import org.embulk.spi.Exec;
|
@@ -15,53 +17,47 @@ public class TestTimestampFormatterParser
|
|
15
17
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
16
18
|
|
17
19
|
private interface FormatterTestTask
|
18
|
-
extends TimestampFormatter.
|
19
|
-
{
|
20
|
-
@Config("time_format")
|
21
|
-
public TimestampFormat getTimeFormat();
|
22
|
-
}
|
20
|
+
extends Task, TimestampFormatter.Task
|
21
|
+
{ }
|
23
22
|
|
24
23
|
private interface ParserTestTask
|
25
|
-
extends TimestampParser.
|
26
|
-
{
|
27
|
-
@Config("time_format")
|
28
|
-
public TimestampFormat getTimeFormat();
|
29
|
-
}
|
24
|
+
extends Task, TimestampParser.Task
|
25
|
+
{ }
|
30
26
|
|
31
27
|
@Test
|
32
28
|
public void testSimpleFormat() throws Exception
|
33
29
|
{
|
34
30
|
ConfigSource config = Exec.newConfigSource()
|
35
|
-
.set("
|
31
|
+
.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S.%9N %Z");
|
36
32
|
FormatterTestTask task = config.loadConfig(FormatterTestTask.class);
|
37
33
|
|
38
|
-
TimestampFormatter formatter = task.
|
39
|
-
assertEquals("2014-11-19 02:46:29.
|
34
|
+
TimestampFormatter formatter = new TimestampFormatter(task, Optional.<TimestampFormatter.TimestampColumnOption>absent());
|
35
|
+
assertEquals("2014-11-19 02:46:29.123456000 UTC", formatter.format(Timestamp.ofEpochSecond(1416365189, 123456*1000)));
|
40
36
|
}
|
41
37
|
|
42
38
|
@Test
|
43
39
|
public void testSimpleParse() throws Exception
|
44
40
|
{
|
45
41
|
ConfigSource config = Exec.newConfigSource()
|
46
|
-
.set("
|
42
|
+
.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %Z");
|
47
43
|
ParserTestTask task = config.loadConfig(ParserTestTask.class);
|
48
44
|
|
49
|
-
TimestampParser parser =
|
50
|
-
assertEquals(Timestamp.ofEpochSecond(1416365189,
|
45
|
+
TimestampParser parser = new TimestampParser(task);
|
46
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189, 0), parser.parse("2014-11-19 02:46:29 UTC"));
|
51
47
|
}
|
52
48
|
|
53
49
|
@Test
|
54
50
|
public void testUnixtimeFormat() throws Exception
|
55
51
|
{
|
56
52
|
ConfigSource config = Exec.newConfigSource()
|
57
|
-
.set("
|
53
|
+
.set("default_timestamp_format", "%s");
|
58
54
|
|
59
55
|
FormatterTestTask ftask = config.loadConfig(FormatterTestTask.class);
|
60
|
-
TimestampFormatter formatter = ftask.
|
56
|
+
TimestampFormatter formatter = new TimestampFormatter(ftask, Optional.<TimestampFormatter.TimestampColumnOption>absent());
|
61
57
|
assertEquals("1416365189", formatter.format(Timestamp.ofEpochSecond(1416365189)));
|
62
58
|
|
63
59
|
ParserTestTask ptask = config.loadConfig(ParserTestTask.class);
|
64
|
-
TimestampParser parser =
|
60
|
+
TimestampParser parser = new TimestampParser(ptask);
|
65
61
|
assertEquals(Timestamp.ofEpochSecond(1416365189), parser.parse("1416365189"));
|
66
62
|
}
|
67
63
|
}
|
data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
package org.embulk.spi.time;
|
2
|
+
|
3
|
+
import org.junit.Rule;
|
4
|
+
import org.junit.Before;
|
5
|
+
import org.junit.Test;
|
6
|
+
import static org.junit.Assert.assertEquals;
|
7
|
+
import org.embulk.config.Config;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.EmbulkTestRuntime;
|
11
|
+
|
12
|
+
public class TestTimestampFormatterParserDeprecated
|
13
|
+
{
|
14
|
+
@Rule
|
15
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
16
|
+
|
17
|
+
private interface FormatterTestTask
|
18
|
+
extends TimestampFormatter.FormatterTask
|
19
|
+
{
|
20
|
+
@Config("time_format")
|
21
|
+
public TimestampFormat getTimeFormat();
|
22
|
+
}
|
23
|
+
|
24
|
+
private interface ParserTestTask
|
25
|
+
extends TimestampParser.ParserTask
|
26
|
+
{
|
27
|
+
@Config("time_format")
|
28
|
+
public TimestampFormat getTimeFormat();
|
29
|
+
}
|
30
|
+
|
31
|
+
@Test
|
32
|
+
public void testSimpleFormat() throws Exception
|
33
|
+
{
|
34
|
+
ConfigSource config = Exec.newConfigSource()
|
35
|
+
.set("time_format", "%Y-%m-%d %H:%M:%S.%6N %Z");
|
36
|
+
FormatterTestTask task = config.loadConfig(FormatterTestTask.class);
|
37
|
+
|
38
|
+
TimestampFormatter formatter = task.getTimeFormat().newFormatter(task);
|
39
|
+
assertEquals("2014-11-19 02:46:29.123456 UTC", formatter.format(Timestamp.ofEpochSecond(1416365189, 123456*1000)));
|
40
|
+
}
|
41
|
+
|
42
|
+
@Test
|
43
|
+
public void testSimpleParse() throws Exception
|
44
|
+
{
|
45
|
+
ConfigSource config = Exec.newConfigSource()
|
46
|
+
.set("time_format", "%Y-%m-%d %H:%M:%S.%N %Z");
|
47
|
+
ParserTestTask task = config.loadConfig(ParserTestTask.class);
|
48
|
+
|
49
|
+
TimestampParser parser = task.getTimeFormat().newParser(task);
|
50
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189, 123456*1000), parser.parse("2014-11-19 02:46:29.123456 UTC"));
|
51
|
+
}
|
52
|
+
|
53
|
+
@Test
|
54
|
+
public void testUnixtimeFormat() throws Exception
|
55
|
+
{
|
56
|
+
ConfigSource config = Exec.newConfigSource()
|
57
|
+
.set("time_format", "%s");
|
58
|
+
|
59
|
+
FormatterTestTask ftask = config.loadConfig(FormatterTestTask.class);
|
60
|
+
TimestampFormatter formatter = ftask.getTimeFormat().newFormatter(ftask);
|
61
|
+
assertEquals("1416365189", formatter.format(Timestamp.ofEpochSecond(1416365189)));
|
62
|
+
|
63
|
+
ParserTestTask ptask = config.loadConfig(ParserTestTask.class);
|
64
|
+
TimestampParser parser = ptask.getTimeFormat().newParser(ptask);
|
65
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189), parser.parse("1416365189"));
|
66
|
+
}
|
67
|
+
}
|
@@ -309,6 +309,10 @@ Options
|
|
309
309
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
310
310
|
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
311
311
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
312
|
+
| default\_timezone | string | Time zone of timestamp columns. This can be overwritten for each column using ``column_options`` | ``UTC`` by default |
|
313
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
314
|
+
| column\_options | hash | See bellow | optional |
|
315
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
312
316
|
|
313
317
|
The ``quote_policy`` option is used to determine field type to quote.
|
314
318
|
|
@@ -322,6 +326,16 @@ The ``quote_policy`` option is used to determine field type to quote.
|
|
322
326
|
| NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
|
323
327
|
+------------+--------------------------------------------------------------------------------------------------------+
|
324
328
|
|
329
|
+
The ``column_options`` option is a map whose keys are name of columns, and values are configuration with following parameters:
|
330
|
+
|
331
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
332
|
+
| name | type | description | required? |
|
333
|
+
+======================+=========+=======================================================================================================+=========================================+
|
334
|
+
| timezone | string | Time zone if type of this column is timestamp. If not set, ``default\_timezone`` is used. | optional |
|
335
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
336
|
+
| format | string | Timestamp format if type of this column is timestamp. | ``%Y-%m-%d %H:%M:%S.%6N %z`` by default |
|
337
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
338
|
+
|
325
339
|
Example
|
326
340
|
~~~~~~~~~~~~~~~~~~
|
327
341
|
|
@@ -339,6 +353,10 @@ Example
|
|
339
353
|
quote: '"'
|
340
354
|
escape: '\\'
|
341
355
|
null_string: '\\N'
|
356
|
+
default_timezone: 'UTC'
|
357
|
+
column_options:
|
358
|
+
mycol1: {format: '%Y-%m-%d %H:%M:%S'}
|
359
|
+
mycol2: {format: '%Y-%m-%d %H:%M:%S', timezone: 'America/Los_Angeles'}
|
342
360
|
|
343
361
|
Gzip encoder plugin
|
344
362
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
Release 0.6.14
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``formatter-csv`` plugin supports ``default_timezone`` and ``column_options`` parameters so that we can set timestamp format for each columns individually.
|
8
|
+
* **IMPORTANT**: ``formatter-csv`` does not use the same timestamp format with input timestamp columns. If you're using ``formatter: type: csv`` with ``parser: type: csv-``, you need to set ``format`` option for each columns to keep using current behavior. See ``column_options`` option of CSV formatter plugin described at :doc:`../built-in`.
|
9
|
+
* ``guess-csv`` plugin keeps using ``delimiter`` option if already set rather than to overwrite it everytime.
|
10
|
+
|
11
|
+
|
12
|
+
Plugin API
|
13
|
+
------------------
|
14
|
+
|
15
|
+
* Added ``config.DataSource#remove(String)`` method.
|
16
|
+
* Added ``spi.ColumnConfig.getOption().``
|
17
|
+
|
18
|
+
* ``spi.type.TimestampType.getFormat()`` is deprecated.
|
19
|
+
* ``spi.ColumnConfig.getFormat()`` is deprecated.
|
20
|
+
* ``spi.ProcessTask`` does not serialize ``TimestampType.format`` any more.
|
21
|
+
|
22
|
+
* Added utility methods for ``spi.Schema`` and ``spi.SchemaConfig``.
|
23
|
+
|
24
|
+
* Added ``Schema.Builder`` class and ``Schema.builder()`` method.
|
25
|
+
* Added ``Schema#lookupColumn(String)``.
|
26
|
+
* Added utility methods to ``SchemaConfig``.
|
27
|
+
|
28
|
+
* Added ``spi.time.TimestampFormatter.Task`` and TimestampColumnOption
|
29
|
+
|
30
|
+
* ``TimestampFormatter.FormatterTask`` is deprecated
|
31
|
+
|
32
|
+
* Added ``spi.time.TimestampParser.Task`` and TimestampColumnOption
|
33
|
+
|
34
|
+
* ``TimestampParser.ParserTask`` is deprecated
|
35
|
+
|
36
|
+
* ``spi.time.TimestampFormat`` is deprecated
|
37
|
+
|
38
|
+
|
39
|
+
General Changes
|
40
|
+
------------------
|
41
|
+
|
42
|
+
* Fixed a problem where embulk shows input plugin name and version twice to log.
|
43
|
+
|
44
|
+
|
45
|
+
Release Date
|
46
|
+
------------------
|
47
|
+
2015-06-29
|
@@ -8,6 +8,7 @@ import org.embulk.config.ConfigDefault;
|
|
8
8
|
import org.embulk.spi.type.TimestampType;
|
9
9
|
import org.embulk.spi.time.Timestamp;
|
10
10
|
import org.embulk.spi.time.TimestampFormatter;
|
11
|
+
import org.embulk.config.Task;
|
11
12
|
import org.embulk.config.TaskSource;
|
12
13
|
import org.embulk.config.ConfigSource;
|
13
14
|
import org.embulk.spi.Column;
|
@@ -47,7 +48,7 @@ public class CsvFormatterPlugin
|
|
47
48
|
}
|
48
49
|
|
49
50
|
public interface PluginTask
|
50
|
-
extends LineEncoder.EncoderTask, TimestampFormatter.
|
51
|
+
extends Task, LineEncoder.EncoderTask, TimestampFormatter.Task
|
51
52
|
{
|
52
53
|
@Config("header_line")
|
53
54
|
@ConfigDefault("true")
|
@@ -76,27 +77,43 @@ public class CsvFormatterPlugin
|
|
76
77
|
@Config("newline_in_field")
|
77
78
|
@ConfigDefault("\"LF\"")
|
78
79
|
public Newline getNewlineInField();
|
80
|
+
|
81
|
+
@Config("column_options")
|
82
|
+
@ConfigDefault("{}")
|
83
|
+
public Map<String, TimestampColumnOption> getColumnOptions();
|
79
84
|
}
|
80
85
|
|
86
|
+
public interface TimestampColumnOption
|
87
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
88
|
+
{ }
|
89
|
+
|
81
90
|
@Override
|
82
91
|
public void transaction(ConfigSource config, Schema schema,
|
83
92
|
FormatterPlugin.Control control)
|
84
93
|
{
|
85
94
|
PluginTask task = config.loadConfig(PluginTask.class);
|
95
|
+
|
96
|
+
// validate column_options
|
97
|
+
for (String columnName : task.getColumnOptions().keySet()) {
|
98
|
+
schema.lookupColumn(columnName); // throws SchemaConfigException
|
99
|
+
}
|
100
|
+
|
86
101
|
control.run(task.dump());
|
87
102
|
}
|
88
103
|
|
89
|
-
private
|
90
|
-
TimestampFormatter.
|
104
|
+
private TimestampFormatter[] newTimestampFormatters(
|
105
|
+
TimestampFormatter.Task formatterTask, Schema schema,
|
106
|
+
Map<String, TimestampColumnOption> columnOptions)
|
91
107
|
{
|
92
|
-
|
108
|
+
TimestampFormatter[] formatters = new TimestampFormatter[schema.getColumnCount()];
|
109
|
+
int i = 0;
|
93
110
|
for (Column column : schema.getColumns()) {
|
94
111
|
if (column.getType() instanceof TimestampType) {
|
95
|
-
|
96
|
-
|
112
|
+
Optional<TimestampColumnOption> option = Optional.fromNullable(columnOptions.get(column.getName()));
|
113
|
+
formatters[i] = new TimestampFormatter(formatterTask, option);
|
97
114
|
}
|
98
115
|
}
|
99
|
-
return
|
116
|
+
return formatters;
|
100
117
|
}
|
101
118
|
|
102
119
|
@Override
|
@@ -105,8 +122,7 @@ public class CsvFormatterPlugin
|
|
105
122
|
{
|
106
123
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
107
124
|
final LineEncoder encoder = new LineEncoder(output, task);
|
108
|
-
final
|
109
|
-
newTimestampFormatters(task, schema);
|
125
|
+
final TimestampFormatter[] timestampFormatters = newTimestampFormatters(task, schema, task.getColumnOptions());
|
110
126
|
final char delimiter = task.getDelimiterChar();
|
111
127
|
final QuotePolicy quotePolicy = task.getQuotePolicy();
|
112
128
|
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
|
@@ -176,7 +192,7 @@ public class CsvFormatterPlugin
|
|
176
192
|
addDelimiter(column);
|
177
193
|
if (!pageReader.isNull(column)) {
|
178
194
|
Timestamp value = pageReader.getTimestamp(column);
|
179
|
-
addValue(timestampFormatters
|
195
|
+
addValue(timestampFormatters[column.getIndex()].format(value));
|
180
196
|
} else {
|
181
197
|
addNullString();
|
182
198
|
}
|
@@ -9,9 +9,11 @@ import org.embulk.config.ConfigSource;
|
|
9
9
|
import org.embulk.config.ConfigException;
|
10
10
|
import org.embulk.config.TaskSource;
|
11
11
|
import org.embulk.spi.type.TimestampType;
|
12
|
+
import org.embulk.spi.time.TimestampFormat;
|
12
13
|
import org.embulk.spi.time.TimestampParser;
|
13
14
|
import org.embulk.spi.time.TimestampParseException;
|
14
15
|
import org.embulk.spi.Column;
|
16
|
+
import org.embulk.spi.ColumnConfig;
|
15
17
|
import org.embulk.spi.Schema;
|
16
18
|
import org.embulk.spi.SchemaConfig;
|
17
19
|
import org.embulk.spi.ColumnVisitor;
|
@@ -35,7 +37,7 @@ public class CsvParserPlugin
|
|
35
37
|
"1");
|
36
38
|
|
37
39
|
public interface PluginTask
|
38
|
-
extends Task, LineDecoder.DecoderTask, TimestampParser.
|
40
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
39
41
|
{
|
40
42
|
@Config("columns")
|
41
43
|
public SchemaConfig getSchemaConfig();
|
@@ -86,8 +88,19 @@ public class CsvParserPlugin
|
|
86
88
|
@Config("allow_extra_columns")
|
87
89
|
@ConfigDefault("false")
|
88
90
|
public boolean getAllowExtraColumns();
|
91
|
+
|
92
|
+
@Config("error")
|
93
|
+
@ConfigDefault("{\"type\": \"warning\"}")
|
94
|
+
public ConfigSource getErrorConfig();
|
95
|
+
|
96
|
+
public TaskSource getErorrTaskSource();
|
97
|
+
public void setErrorTaskSource(TaskSource errorTask);
|
89
98
|
}
|
90
99
|
|
100
|
+
public interface TimestampColumnOption
|
101
|
+
extends Task, TimestampParser.TimestampColumnOption
|
102
|
+
{ }
|
103
|
+
|
91
104
|
private final Logger log;
|
92
105
|
|
93
106
|
public CsvParserPlugin()
|
@@ -116,14 +129,16 @@ public class CsvParserPlugin
|
|
116
129
|
}
|
117
130
|
|
118
131
|
private TimestampParser[] newTimestampParsers(
|
119
|
-
TimestampParser.
|
132
|
+
TimestampParser.Task parserTask, SchemaConfig schema)
|
120
133
|
{
|
121
134
|
TimestampParser[] parsers = new TimestampParser[schema.getColumnCount()];
|
122
|
-
|
135
|
+
int i = 0;
|
136
|
+
for (ColumnConfig column : schema.getColumns()) {
|
123
137
|
if (column.getType() instanceof TimestampType) {
|
124
|
-
|
125
|
-
parsers[
|
138
|
+
TimestampColumnOption option = column.getOption().loadConfig(TimestampColumnOption.class);
|
139
|
+
parsers[i] = new TimestampParser(parserTask, option);
|
126
140
|
}
|
141
|
+
i++;
|
127
142
|
}
|
128
143
|
return parsers;
|
129
144
|
}
|
@@ -133,7 +148,7 @@ public class CsvParserPlugin
|
|
133
148
|
FileInput input, PageOutput output)
|
134
149
|
{
|
135
150
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
136
|
-
final TimestampParser[] timestampFormatters = newTimestampParsers(task,
|
151
|
+
final TimestampParser[] timestampFormatters = newTimestampParsers(task, task.getSchemaConfig());
|
137
152
|
LineDecoder lineDecoder = new LineDecoder(input, task);
|
138
153
|
final CsvTokenizer tokenizer = new CsvTokenizer(lineDecoder, task);
|
139
154
|
final String nullStringOrNull = task.getNullString().orNull();
|
@@ -2,6 +2,7 @@ package org.embulk.standards;
|
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import com.google.common.collect.ImmutableMap;
|
5
|
+
import org.joda.time.DateTimeZone;
|
5
6
|
import org.junit.Rule;
|
6
7
|
import org.junit.Test;
|
7
8
|
import java.lang.reflect.InvocationTargetException;
|
@@ -32,6 +33,8 @@ public class TestCsvFormatterPlugin
|
|
32
33
|
assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
|
33
34
|
assertEquals(false, task.getEscapeChar().isPresent());
|
34
35
|
assertEquals("", task.getNullString());
|
36
|
+
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
37
|
+
assertEquals("%Y-%m-%d %H:%M:%S.%6N %z", task.getDefaultTimestampFormat());
|
35
38
|
assertEquals(Newline.LF, task.getNewlineInField());
|
36
39
|
}
|
37
40
|
|
@@ -6,6 +6,7 @@ import static org.junit.Assert.assertEquals;
|
|
6
6
|
import java.nio.charset.Charset;
|
7
7
|
import com.google.common.collect.ImmutableList;
|
8
8
|
import com.google.common.collect.ImmutableMap;
|
9
|
+
import org.joda.time.DateTimeZone;
|
9
10
|
import org.embulk.EmbulkTestRuntime;
|
10
11
|
import org.embulk.config.ConfigException;
|
11
12
|
import org.embulk.config.ConfigSource;
|
@@ -34,6 +35,8 @@ public class TestCsvParserPlugin
|
|
34
35
|
assertEquals(',', task.getDelimiterChar());
|
35
36
|
assertEquals('\"', task.getQuoteChar());
|
36
37
|
assertEquals(false, task.getAllowOptionalColumns());
|
38
|
+
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
39
|
+
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
|
37
40
|
}
|
38
41
|
|
39
42
|
@Test(expected = ConfigException.class)
|
@@ -41,12 +41,10 @@ public class TestCsvTokenizer
|
|
41
41
|
config = Exec.newConfigSource()
|
42
42
|
.set("newline", "LF")
|
43
43
|
.set("columns", ImmutableList.of(
|
44
|
-
ImmutableMap
|
45
|
-
"name", "date_code",
|
46
|
-
|
47
|
-
|
48
|
-
"name", "foo",
|
49
|
-
"type", "string"))
|
44
|
+
ImmutableMap.<String,Object>of(
|
45
|
+
"name", "date_code", "type", "string", "option", ImmutableMap.of()),
|
46
|
+
ImmutableMap.<String,Object>of(
|
47
|
+
"name", "foo", "type", "string", "option", ImmutableMap.of()))
|
50
48
|
);
|
51
49
|
reloadPluginTask();
|
52
50
|
}
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -35,13 +35,17 @@ module Embulk
|
|
35
35
|
def guess_lines(config, sample_lines)
|
36
36
|
return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
parser_config = config["parser"] || {}
|
39
|
+
if parser_config["type"] == "csv" && parser_config["delimiter"]
|
40
|
+
delim = parser_config["delimiter"]
|
41
|
+
else
|
42
|
+
delim = guess_delimiter(sample_lines)
|
43
|
+
unless delim
|
44
|
+
# not CSV file
|
45
|
+
return {}
|
46
|
+
end
|
42
47
|
end
|
43
48
|
|
44
|
-
parser_config = config["parser"] || {}
|
45
49
|
parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "csv", "delimiter" => delim})
|
46
50
|
|
47
51
|
unless parser_guessed.has_key?("quote")
|