embulk 0.6.13 → 0.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +2 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +8 -1
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +28 -6
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +12 -77
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +35 -2
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +22 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +40 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +2 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -22
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +10 -10
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +12 -12
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +49 -31
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +15 -19
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +67 -0
- data/embulk-docs/src/built-in.rst +18 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.14.rst +47 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +26 -10
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +21 -6
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +4 -6
- data/lib/embulk/guess/csv.rb +9 -5
- data/lib/embulk/plugin_registry.rb +15 -11
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
@@ -3,7 +3,9 @@ package org.embulk.spi.time;
|
|
3
3
|
import org.junit.Rule;
|
4
4
|
import org.junit.Before;
|
5
5
|
import org.junit.Test;
|
6
|
+
import com.google.common.base.Optional;
|
6
7
|
import static org.junit.Assert.assertEquals;
|
8
|
+
import org.embulk.config.Task;
|
7
9
|
import org.embulk.config.Config;
|
8
10
|
import org.embulk.config.ConfigSource;
|
9
11
|
import org.embulk.spi.Exec;
|
@@ -15,53 +17,47 @@ public class TestTimestampFormatterParser
|
|
15
17
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
16
18
|
|
17
19
|
private interface FormatterTestTask
|
18
|
-
extends TimestampFormatter.
|
19
|
-
{
|
20
|
-
@Config("time_format")
|
21
|
-
public TimestampFormat getTimeFormat();
|
22
|
-
}
|
20
|
+
extends Task, TimestampFormatter.Task
|
21
|
+
{ }
|
23
22
|
|
24
23
|
private interface ParserTestTask
|
25
|
-
extends TimestampParser.
|
26
|
-
{
|
27
|
-
@Config("time_format")
|
28
|
-
public TimestampFormat getTimeFormat();
|
29
|
-
}
|
24
|
+
extends Task, TimestampParser.Task
|
25
|
+
{ }
|
30
26
|
|
31
27
|
@Test
|
32
28
|
public void testSimpleFormat() throws Exception
|
33
29
|
{
|
34
30
|
ConfigSource config = Exec.newConfigSource()
|
35
|
-
.set("
|
31
|
+
.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S.%9N %Z");
|
36
32
|
FormatterTestTask task = config.loadConfig(FormatterTestTask.class);
|
37
33
|
|
38
|
-
TimestampFormatter formatter = task.
|
39
|
-
assertEquals("2014-11-19 02:46:29.
|
34
|
+
TimestampFormatter formatter = new TimestampFormatter(task, Optional.<TimestampFormatter.TimestampColumnOption>absent());
|
35
|
+
assertEquals("2014-11-19 02:46:29.123456000 UTC", formatter.format(Timestamp.ofEpochSecond(1416365189, 123456*1000)));
|
40
36
|
}
|
41
37
|
|
42
38
|
@Test
|
43
39
|
public void testSimpleParse() throws Exception
|
44
40
|
{
|
45
41
|
ConfigSource config = Exec.newConfigSource()
|
46
|
-
.set("
|
42
|
+
.set("default_timestamp_format", "%Y-%m-%d %H:%M:%S %Z");
|
47
43
|
ParserTestTask task = config.loadConfig(ParserTestTask.class);
|
48
44
|
|
49
|
-
TimestampParser parser =
|
50
|
-
assertEquals(Timestamp.ofEpochSecond(1416365189,
|
45
|
+
TimestampParser parser = new TimestampParser(task);
|
46
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189, 0), parser.parse("2014-11-19 02:46:29 UTC"));
|
51
47
|
}
|
52
48
|
|
53
49
|
@Test
|
54
50
|
public void testUnixtimeFormat() throws Exception
|
55
51
|
{
|
56
52
|
ConfigSource config = Exec.newConfigSource()
|
57
|
-
.set("
|
53
|
+
.set("default_timestamp_format", "%s");
|
58
54
|
|
59
55
|
FormatterTestTask ftask = config.loadConfig(FormatterTestTask.class);
|
60
|
-
TimestampFormatter formatter = ftask.
|
56
|
+
TimestampFormatter formatter = new TimestampFormatter(ftask, Optional.<TimestampFormatter.TimestampColumnOption>absent());
|
61
57
|
assertEquals("1416365189", formatter.format(Timestamp.ofEpochSecond(1416365189)));
|
62
58
|
|
63
59
|
ParserTestTask ptask = config.loadConfig(ParserTestTask.class);
|
64
|
-
TimestampParser parser =
|
60
|
+
TimestampParser parser = new TimestampParser(ptask);
|
65
61
|
assertEquals(Timestamp.ofEpochSecond(1416365189), parser.parse("1416365189"));
|
66
62
|
}
|
67
63
|
}
|
data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
package org.embulk.spi.time;
|
2
|
+
|
3
|
+
import org.junit.Rule;
|
4
|
+
import org.junit.Before;
|
5
|
+
import org.junit.Test;
|
6
|
+
import static org.junit.Assert.assertEquals;
|
7
|
+
import org.embulk.config.Config;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.EmbulkTestRuntime;
|
11
|
+
|
12
|
+
public class TestTimestampFormatterParserDeprecated
|
13
|
+
{
|
14
|
+
@Rule
|
15
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
16
|
+
|
17
|
+
private interface FormatterTestTask
|
18
|
+
extends TimestampFormatter.FormatterTask
|
19
|
+
{
|
20
|
+
@Config("time_format")
|
21
|
+
public TimestampFormat getTimeFormat();
|
22
|
+
}
|
23
|
+
|
24
|
+
private interface ParserTestTask
|
25
|
+
extends TimestampParser.ParserTask
|
26
|
+
{
|
27
|
+
@Config("time_format")
|
28
|
+
public TimestampFormat getTimeFormat();
|
29
|
+
}
|
30
|
+
|
31
|
+
@Test
|
32
|
+
public void testSimpleFormat() throws Exception
|
33
|
+
{
|
34
|
+
ConfigSource config = Exec.newConfigSource()
|
35
|
+
.set("time_format", "%Y-%m-%d %H:%M:%S.%6N %Z");
|
36
|
+
FormatterTestTask task = config.loadConfig(FormatterTestTask.class);
|
37
|
+
|
38
|
+
TimestampFormatter formatter = task.getTimeFormat().newFormatter(task);
|
39
|
+
assertEquals("2014-11-19 02:46:29.123456 UTC", formatter.format(Timestamp.ofEpochSecond(1416365189, 123456*1000)));
|
40
|
+
}
|
41
|
+
|
42
|
+
@Test
|
43
|
+
public void testSimpleParse() throws Exception
|
44
|
+
{
|
45
|
+
ConfigSource config = Exec.newConfigSource()
|
46
|
+
.set("time_format", "%Y-%m-%d %H:%M:%S.%N %Z");
|
47
|
+
ParserTestTask task = config.loadConfig(ParserTestTask.class);
|
48
|
+
|
49
|
+
TimestampParser parser = task.getTimeFormat().newParser(task);
|
50
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189, 123456*1000), parser.parse("2014-11-19 02:46:29.123456 UTC"));
|
51
|
+
}
|
52
|
+
|
53
|
+
@Test
|
54
|
+
public void testUnixtimeFormat() throws Exception
|
55
|
+
{
|
56
|
+
ConfigSource config = Exec.newConfigSource()
|
57
|
+
.set("time_format", "%s");
|
58
|
+
|
59
|
+
FormatterTestTask ftask = config.loadConfig(FormatterTestTask.class);
|
60
|
+
TimestampFormatter formatter = ftask.getTimeFormat().newFormatter(ftask);
|
61
|
+
assertEquals("1416365189", formatter.format(Timestamp.ofEpochSecond(1416365189)));
|
62
|
+
|
63
|
+
ParserTestTask ptask = config.loadConfig(ParserTestTask.class);
|
64
|
+
TimestampParser parser = ptask.getTimeFormat().newParser(ptask);
|
65
|
+
assertEquals(Timestamp.ofEpochSecond(1416365189), parser.parse("1416365189"));
|
66
|
+
}
|
67
|
+
}
|
@@ -309,6 +309,10 @@ Options
|
|
309
309
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
310
310
|
| charset | enum | Character encoding (eg. ISO-8859-1, UTF-8) | ``UTF-8`` by default |
|
311
311
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
312
|
+
| default\_timezone | string | Time zone of timestamp columns. This can be overwritten for each column using ``column_options`` | ``UTC`` by default |
|
313
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
314
|
+
| column\_options | hash | See bellow | optional |
|
315
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+------------------------+
|
312
316
|
|
313
317
|
The ``quote_policy`` option is used to determine field type to quote.
|
314
318
|
|
@@ -322,6 +326,16 @@ The ``quote_policy`` option is used to determine field type to quote.
|
|
322
326
|
| NONE | Never quote fields. When the delimiter occurs in field, escape with escape char |
|
323
327
|
+------------+--------------------------------------------------------------------------------------------------------+
|
324
328
|
|
329
|
+
The ``column_options`` option is a map whose keys are name of columns, and values are configuration with following parameters:
|
330
|
+
|
331
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
332
|
+
| name | type | description | required? |
|
333
|
+
+======================+=========+=======================================================================================================+=========================================+
|
334
|
+
| timezone | string | Time zone if type of this column is timestamp. If not set, ``default\_timezone`` is used. | optional |
|
335
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
336
|
+
| format | string | Timestamp format if type of this column is timestamp. | ``%Y-%m-%d %H:%M:%S.%6N %z`` by default |
|
337
|
+
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
338
|
+
|
325
339
|
Example
|
326
340
|
~~~~~~~~~~~~~~~~~~
|
327
341
|
|
@@ -339,6 +353,10 @@ Example
|
|
339
353
|
quote: '"'
|
340
354
|
escape: '\\'
|
341
355
|
null_string: '\\N'
|
356
|
+
default_timezone: 'UTC'
|
357
|
+
column_options:
|
358
|
+
mycol1: {format: '%Y-%m-%d %H:%M:%S'}
|
359
|
+
mycol2: {format: '%Y-%m-%d %H:%M:%S', timezone: 'America/Los_Angeles'}
|
342
360
|
|
343
361
|
Gzip encoder plugin
|
344
362
|
------------------
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
Release 0.6.14
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Built-in plugins
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* ``formatter-csv`` plugin supports ``default_timezone`` and ``column_options`` parameters so that we can set timestamp format for each columns individually.
|
8
|
+
* **IMPORTANT**: ``formatter-csv`` does not use the same timestamp format with input timestamp columns. If you're using ``formatter: type: csv`` with ``parser: type: csv-``, you need to set ``format`` option for each columns to keep using current behavior. See ``column_options`` option of CSV formatter plugin described at :doc:`../built-in`.
|
9
|
+
* ``guess-csv`` plugin keeps using ``delimiter`` option if already set rather than to overwrite it everytime.
|
10
|
+
|
11
|
+
|
12
|
+
Plugin API
|
13
|
+
------------------
|
14
|
+
|
15
|
+
* Added ``config.DataSource#remove(String)`` method.
|
16
|
+
* Added ``spi.ColumnConfig.getOption().``
|
17
|
+
|
18
|
+
* ``spi.type.TimestampType.getFormat()`` is deprecated.
|
19
|
+
* ``spi.ColumnConfig.getFormat()`` is deprecated.
|
20
|
+
* ``spi.ProcessTask`` does not serialize ``TimestampType.format`` any more.
|
21
|
+
|
22
|
+
* Added utility methods for ``spi.Schema`` and ``spi.SchemaConfig``.
|
23
|
+
|
24
|
+
* Added ``Schema.Builder`` class and ``Schema.builder()`` method.
|
25
|
+
* Added ``Schema#lookupColumn(String)``.
|
26
|
+
* Added utility methods to ``SchemaConfig``.
|
27
|
+
|
28
|
+
* Added ``spi.time.TimestampFormatter.Task`` and TimestampColumnOption
|
29
|
+
|
30
|
+
* ``TimestampFormatter.FormatterTask`` is deprecated
|
31
|
+
|
32
|
+
* Added ``spi.time.TimestampParser.Task`` and TimestampColumnOption
|
33
|
+
|
34
|
+
* ``TimestampParser.ParserTask`` is deprecated
|
35
|
+
|
36
|
+
* ``spi.time.TimestampFormat`` is deprecated
|
37
|
+
|
38
|
+
|
39
|
+
General Changes
|
40
|
+
------------------
|
41
|
+
|
42
|
+
* Fixed a problem where embulk shows input plugin name and version twice to log.
|
43
|
+
|
44
|
+
|
45
|
+
Release Date
|
46
|
+
------------------
|
47
|
+
2015-06-29
|
@@ -8,6 +8,7 @@ import org.embulk.config.ConfigDefault;
|
|
8
8
|
import org.embulk.spi.type.TimestampType;
|
9
9
|
import org.embulk.spi.time.Timestamp;
|
10
10
|
import org.embulk.spi.time.TimestampFormatter;
|
11
|
+
import org.embulk.config.Task;
|
11
12
|
import org.embulk.config.TaskSource;
|
12
13
|
import org.embulk.config.ConfigSource;
|
13
14
|
import org.embulk.spi.Column;
|
@@ -47,7 +48,7 @@ public class CsvFormatterPlugin
|
|
47
48
|
}
|
48
49
|
|
49
50
|
public interface PluginTask
|
50
|
-
extends LineEncoder.EncoderTask, TimestampFormatter.
|
51
|
+
extends Task, LineEncoder.EncoderTask, TimestampFormatter.Task
|
51
52
|
{
|
52
53
|
@Config("header_line")
|
53
54
|
@ConfigDefault("true")
|
@@ -76,27 +77,43 @@ public class CsvFormatterPlugin
|
|
76
77
|
@Config("newline_in_field")
|
77
78
|
@ConfigDefault("\"LF\"")
|
78
79
|
public Newline getNewlineInField();
|
80
|
+
|
81
|
+
@Config("column_options")
|
82
|
+
@ConfigDefault("{}")
|
83
|
+
public Map<String, TimestampColumnOption> getColumnOptions();
|
79
84
|
}
|
80
85
|
|
86
|
+
public interface TimestampColumnOption
|
87
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
88
|
+
{ }
|
89
|
+
|
81
90
|
@Override
|
82
91
|
public void transaction(ConfigSource config, Schema schema,
|
83
92
|
FormatterPlugin.Control control)
|
84
93
|
{
|
85
94
|
PluginTask task = config.loadConfig(PluginTask.class);
|
95
|
+
|
96
|
+
// validate column_options
|
97
|
+
for (String columnName : task.getColumnOptions().keySet()) {
|
98
|
+
schema.lookupColumn(columnName); // throws SchemaConfigException
|
99
|
+
}
|
100
|
+
|
86
101
|
control.run(task.dump());
|
87
102
|
}
|
88
103
|
|
89
|
-
private
|
90
|
-
TimestampFormatter.
|
104
|
+
private TimestampFormatter[] newTimestampFormatters(
|
105
|
+
TimestampFormatter.Task formatterTask, Schema schema,
|
106
|
+
Map<String, TimestampColumnOption> columnOptions)
|
91
107
|
{
|
92
|
-
|
108
|
+
TimestampFormatter[] formatters = new TimestampFormatter[schema.getColumnCount()];
|
109
|
+
int i = 0;
|
93
110
|
for (Column column : schema.getColumns()) {
|
94
111
|
if (column.getType() instanceof TimestampType) {
|
95
|
-
|
96
|
-
|
112
|
+
Optional<TimestampColumnOption> option = Optional.fromNullable(columnOptions.get(column.getName()));
|
113
|
+
formatters[i] = new TimestampFormatter(formatterTask, option);
|
97
114
|
}
|
98
115
|
}
|
99
|
-
return
|
116
|
+
return formatters;
|
100
117
|
}
|
101
118
|
|
102
119
|
@Override
|
@@ -105,8 +122,7 @@ public class CsvFormatterPlugin
|
|
105
122
|
{
|
106
123
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
107
124
|
final LineEncoder encoder = new LineEncoder(output, task);
|
108
|
-
final
|
109
|
-
newTimestampFormatters(task, schema);
|
125
|
+
final TimestampFormatter[] timestampFormatters = newTimestampFormatters(task, schema, task.getColumnOptions());
|
110
126
|
final char delimiter = task.getDelimiterChar();
|
111
127
|
final QuotePolicy quotePolicy = task.getQuotePolicy();
|
112
128
|
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
|
@@ -176,7 +192,7 @@ public class CsvFormatterPlugin
|
|
176
192
|
addDelimiter(column);
|
177
193
|
if (!pageReader.isNull(column)) {
|
178
194
|
Timestamp value = pageReader.getTimestamp(column);
|
179
|
-
addValue(timestampFormatters
|
195
|
+
addValue(timestampFormatters[column.getIndex()].format(value));
|
180
196
|
} else {
|
181
197
|
addNullString();
|
182
198
|
}
|
@@ -9,9 +9,11 @@ import org.embulk.config.ConfigSource;
|
|
9
9
|
import org.embulk.config.ConfigException;
|
10
10
|
import org.embulk.config.TaskSource;
|
11
11
|
import org.embulk.spi.type.TimestampType;
|
12
|
+
import org.embulk.spi.time.TimestampFormat;
|
12
13
|
import org.embulk.spi.time.TimestampParser;
|
13
14
|
import org.embulk.spi.time.TimestampParseException;
|
14
15
|
import org.embulk.spi.Column;
|
16
|
+
import org.embulk.spi.ColumnConfig;
|
15
17
|
import org.embulk.spi.Schema;
|
16
18
|
import org.embulk.spi.SchemaConfig;
|
17
19
|
import org.embulk.spi.ColumnVisitor;
|
@@ -35,7 +37,7 @@ public class CsvParserPlugin
|
|
35
37
|
"1");
|
36
38
|
|
37
39
|
public interface PluginTask
|
38
|
-
extends Task, LineDecoder.DecoderTask, TimestampParser.
|
40
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
39
41
|
{
|
40
42
|
@Config("columns")
|
41
43
|
public SchemaConfig getSchemaConfig();
|
@@ -86,8 +88,19 @@ public class CsvParserPlugin
|
|
86
88
|
@Config("allow_extra_columns")
|
87
89
|
@ConfigDefault("false")
|
88
90
|
public boolean getAllowExtraColumns();
|
91
|
+
|
92
|
+
@Config("error")
|
93
|
+
@ConfigDefault("{\"type\": \"warning\"}")
|
94
|
+
public ConfigSource getErrorConfig();
|
95
|
+
|
96
|
+
public TaskSource getErorrTaskSource();
|
97
|
+
public void setErrorTaskSource(TaskSource errorTask);
|
89
98
|
}
|
90
99
|
|
100
|
+
public interface TimestampColumnOption
|
101
|
+
extends Task, TimestampParser.TimestampColumnOption
|
102
|
+
{ }
|
103
|
+
|
91
104
|
private final Logger log;
|
92
105
|
|
93
106
|
public CsvParserPlugin()
|
@@ -116,14 +129,16 @@ public class CsvParserPlugin
|
|
116
129
|
}
|
117
130
|
|
118
131
|
private TimestampParser[] newTimestampParsers(
|
119
|
-
TimestampParser.
|
132
|
+
TimestampParser.Task parserTask, SchemaConfig schema)
|
120
133
|
{
|
121
134
|
TimestampParser[] parsers = new TimestampParser[schema.getColumnCount()];
|
122
|
-
|
135
|
+
int i = 0;
|
136
|
+
for (ColumnConfig column : schema.getColumns()) {
|
123
137
|
if (column.getType() instanceof TimestampType) {
|
124
|
-
|
125
|
-
parsers[
|
138
|
+
TimestampColumnOption option = column.getOption().loadConfig(TimestampColumnOption.class);
|
139
|
+
parsers[i] = new TimestampParser(parserTask, option);
|
126
140
|
}
|
141
|
+
i++;
|
127
142
|
}
|
128
143
|
return parsers;
|
129
144
|
}
|
@@ -133,7 +148,7 @@ public class CsvParserPlugin
|
|
133
148
|
FileInput input, PageOutput output)
|
134
149
|
{
|
135
150
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
136
|
-
final TimestampParser[] timestampFormatters = newTimestampParsers(task,
|
151
|
+
final TimestampParser[] timestampFormatters = newTimestampParsers(task, task.getSchemaConfig());
|
137
152
|
LineDecoder lineDecoder = new LineDecoder(input, task);
|
138
153
|
final CsvTokenizer tokenizer = new CsvTokenizer(lineDecoder, task);
|
139
154
|
final String nullStringOrNull = task.getNullString().orNull();
|
@@ -2,6 +2,7 @@ package org.embulk.standards;
|
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import com.google.common.collect.ImmutableMap;
|
5
|
+
import org.joda.time.DateTimeZone;
|
5
6
|
import org.junit.Rule;
|
6
7
|
import org.junit.Test;
|
7
8
|
import java.lang.reflect.InvocationTargetException;
|
@@ -32,6 +33,8 @@ public class TestCsvFormatterPlugin
|
|
32
33
|
assertEquals(CsvFormatterPlugin.QuotePolicy.MINIMAL, task.getQuotePolicy());
|
33
34
|
assertEquals(false, task.getEscapeChar().isPresent());
|
34
35
|
assertEquals("", task.getNullString());
|
36
|
+
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
37
|
+
assertEquals("%Y-%m-%d %H:%M:%S.%6N %z", task.getDefaultTimestampFormat());
|
35
38
|
assertEquals(Newline.LF, task.getNewlineInField());
|
36
39
|
}
|
37
40
|
|
@@ -6,6 +6,7 @@ import static org.junit.Assert.assertEquals;
|
|
6
6
|
import java.nio.charset.Charset;
|
7
7
|
import com.google.common.collect.ImmutableList;
|
8
8
|
import com.google.common.collect.ImmutableMap;
|
9
|
+
import org.joda.time.DateTimeZone;
|
9
10
|
import org.embulk.EmbulkTestRuntime;
|
10
11
|
import org.embulk.config.ConfigException;
|
11
12
|
import org.embulk.config.ConfigSource;
|
@@ -34,6 +35,8 @@ public class TestCsvParserPlugin
|
|
34
35
|
assertEquals(',', task.getDelimiterChar());
|
35
36
|
assertEquals('\"', task.getQuoteChar());
|
36
37
|
assertEquals(false, task.getAllowOptionalColumns());
|
38
|
+
assertEquals(DateTimeZone.UTC, task.getDefaultTimeZone());
|
39
|
+
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
|
37
40
|
}
|
38
41
|
|
39
42
|
@Test(expected = ConfigException.class)
|
@@ -41,12 +41,10 @@ public class TestCsvTokenizer
|
|
41
41
|
config = Exec.newConfigSource()
|
42
42
|
.set("newline", "LF")
|
43
43
|
.set("columns", ImmutableList.of(
|
44
|
-
ImmutableMap
|
45
|
-
"name", "date_code",
|
46
|
-
|
47
|
-
|
48
|
-
"name", "foo",
|
49
|
-
"type", "string"))
|
44
|
+
ImmutableMap.<String,Object>of(
|
45
|
+
"name", "date_code", "type", "string", "option", ImmutableMap.of()),
|
46
|
+
ImmutableMap.<String,Object>of(
|
47
|
+
"name", "foo", "type", "string", "option", ImmutableMap.of()))
|
50
48
|
);
|
51
49
|
reloadPluginTask();
|
52
50
|
}
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -35,13 +35,17 @@ module Embulk
|
|
35
35
|
def guess_lines(config, sample_lines)
|
36
36
|
return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
parser_config = config["parser"] || {}
|
39
|
+
if parser_config["type"] == "csv" && parser_config["delimiter"]
|
40
|
+
delim = parser_config["delimiter"]
|
41
|
+
else
|
42
|
+
delim = guess_delimiter(sample_lines)
|
43
|
+
unless delim
|
44
|
+
# not CSV file
|
45
|
+
return {}
|
46
|
+
end
|
42
47
|
end
|
43
48
|
|
44
|
-
parser_config = config["parser"] || {}
|
45
49
|
parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "csv", "delimiter" => delim})
|
46
50
|
|
47
51
|
unless parser_guessed.has_key?("quote")
|