embulk 0.6.13 → 0.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +2 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +8 -1
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +28 -6
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +12 -77
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +35 -2
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +22 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +40 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +43 -4
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +2 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -22
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +10 -10
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +12 -12
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +49 -31
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +15 -19
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +67 -0
- data/embulk-docs/src/built-in.rst +18 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.14.rst +47 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +26 -10
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +21 -6
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +3 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +4 -6
- data/lib/embulk/guess/csv.rb +9 -5
- data/lib/embulk/plugin_registry.rb +15 -11
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abf8ae432c5b7f3817ac7acd72aee485a9b4f616
|
4
|
+
data.tar.gz: 8454af492f563890e26f11cc9f6fbf7fe8a7b66a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49bb503770d751d426dcb2f4331faabff654bc906cd91610980bd435fde69c78e85043aea0a2f34f9906a293d3987d725493def95a769132993849cfcc14fafb
|
7
|
+
data.tar.gz: 50b97e0ffc2d76581eabcb174cbebf638cc1c8eae34fbaf139e711a3856a4eb6f4b197637213d13cbc17a9d36f9256f77f65682413e0727cae39c2c0c392882b
|
data/README.md
CHANGED
@@ -206,5 +206,5 @@ git tag v$VERSION
|
|
206
206
|
|
207
207
|
See also:
|
208
208
|
* [Bintray](https://bintray.com)
|
209
|
-
* [How to acquire bintray API Keys](https://bintray.com/docs/usermanual/interacting/
|
209
|
+
* [How to acquire bintray API Keys](https://bintray.com/docs/usermanual/interacting/interacting_editingyouruserprofile.html#anchorAPIKEY)
|
210
210
|
|
data/build.gradle
CHANGED
@@ -115,7 +115,7 @@ public class DataSourceImpl
|
|
115
115
|
public DataSourceImpl set(String attrName, Object v)
|
116
116
|
{
|
117
117
|
if (v == null) {
|
118
|
-
|
118
|
+
remove(attrName);
|
119
119
|
} else {
|
120
120
|
data.put(attrName, model.writeObjectAsJsonNode(v));
|
121
121
|
}
|
@@ -138,6 +138,13 @@ public class DataSourceImpl
|
|
138
138
|
return this;
|
139
139
|
}
|
140
140
|
|
141
|
+
@Override
|
142
|
+
public DataSourceImpl remove(String attrName)
|
143
|
+
{
|
144
|
+
data.remove(attrName);
|
145
|
+
return this;
|
146
|
+
}
|
147
|
+
|
141
148
|
@Override
|
142
149
|
public DataSourceImpl deepCopy()
|
143
150
|
{
|
@@ -3,24 +3,37 @@ package org.embulk.spi;
|
|
3
3
|
import java.util.Objects;
|
4
4
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
5
5
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import org.embulk.config.ConfigSource;
|
6
7
|
import org.embulk.spi.type.Type;
|
7
8
|
import org.embulk.spi.type.TimestampType;
|
9
|
+
import org.embulk.spi.Exec;
|
8
10
|
|
9
11
|
public class ColumnConfig
|
10
12
|
{
|
11
13
|
private final String name;
|
12
14
|
private final Type type;
|
13
|
-
private final
|
15
|
+
private final ConfigSource option;
|
16
|
+
|
17
|
+
@Deprecated
|
18
|
+
public ColumnConfig(String name, Type type, String format)
|
19
|
+
{
|
20
|
+
this.name = name;
|
21
|
+
this.type = type;
|
22
|
+
this.option = Exec.newConfigSource(); // only for backward compatibility
|
23
|
+
if (format != null) {
|
24
|
+
option.set("format", format);
|
25
|
+
}
|
26
|
+
}
|
14
27
|
|
15
28
|
@JsonCreator
|
16
29
|
public ColumnConfig(
|
17
30
|
@JsonProperty("name") String name,
|
18
31
|
@JsonProperty("type") Type type,
|
19
|
-
@JsonProperty("
|
32
|
+
@JsonProperty("option") ConfigSource option)
|
20
33
|
{
|
21
34
|
this.name = name;
|
22
35
|
this.type = type;
|
23
|
-
this.
|
36
|
+
this.option = option;
|
24
37
|
}
|
25
38
|
|
26
39
|
@JsonProperty("name")
|
@@ -35,15 +48,23 @@ public class ColumnConfig
|
|
35
48
|
return type;
|
36
49
|
}
|
37
50
|
|
38
|
-
@JsonProperty("
|
51
|
+
@JsonProperty("option")
|
52
|
+
public ConfigSource getOption()
|
53
|
+
{
|
54
|
+
return option;
|
55
|
+
}
|
56
|
+
|
57
|
+
@Deprecated
|
39
58
|
public String getFormat()
|
40
59
|
{
|
41
|
-
return format;
|
60
|
+
return option.get(String.class, "format", null);
|
42
61
|
}
|
43
62
|
|
44
63
|
public Column toColumn(int index)
|
45
64
|
{
|
65
|
+
String format = option.get(String.class, "format", null);
|
46
66
|
if (type instanceof TimestampType && format != null) {
|
67
|
+
// this behavior is only for backward compatibility. TimestampType#getFormat is @Deprecated
|
47
68
|
return new Column(index, name, ((TimestampType) type).withFormat(format));
|
48
69
|
} else {
|
49
70
|
return new Column(index, name, type);
|
@@ -61,7 +82,8 @@ public class ColumnConfig
|
|
61
82
|
}
|
62
83
|
ColumnConfig other = (ColumnConfig) obj;
|
63
84
|
return Objects.equals(this.name, other.name) &&
|
64
|
-
Objects.equals(type, other.type)
|
85
|
+
Objects.equals(type, other.type) &&
|
86
|
+
Objects.equals(option, other.option);
|
65
87
|
}
|
66
88
|
|
67
89
|
@Override
|
@@ -18,7 +18,7 @@ abstract class PageFormat
|
|
18
18
|
|
19
19
|
static int nullBitSetSize(Schema schema)
|
20
20
|
{
|
21
|
-
return (schema.
|
21
|
+
return (schema.getColumnCount() + 7) / 8;
|
22
22
|
}
|
23
23
|
|
24
24
|
static int recordHeaderSize(Schema schema)
|
@@ -33,11 +33,11 @@ abstract class PageFormat
|
|
33
33
|
|
34
34
|
static int[] columnOffsets(Schema schema)
|
35
35
|
{
|
36
|
-
int[] offsets = new int[schema.
|
36
|
+
int[] offsets = new int[schema.getColumnCount()];
|
37
37
|
|
38
38
|
if (!schema.isEmpty()) {
|
39
39
|
offsets[0] = recordHeaderSize(schema);
|
40
|
-
for (int i=0; i < schema.
|
40
|
+
for (int i=0; i < schema.getColumnCount()-1; i++) {
|
41
41
|
offsets[i+1] = offsets[i] + schema.getColumnType(i).getFixedStorageSize();
|
42
42
|
}
|
43
43
|
}
|
@@ -25,16 +25,17 @@ public class ProcessTask
|
|
25
25
|
private final Schema executorSchema;
|
26
26
|
private TaskSource executorTaskSource;
|
27
27
|
|
28
|
+
@JsonCreator
|
28
29
|
public ProcessTask(
|
29
|
-
PluginType inputPluginType,
|
30
|
-
PluginType outputPluginType,
|
31
|
-
List<PluginType> filterPluginTypes,
|
32
|
-
TaskSource inputTaskSource,
|
33
|
-
TaskSource outputTaskSource,
|
34
|
-
List<TaskSource> filterTaskSources,
|
35
|
-
List<Schema> schemas,
|
36
|
-
Schema executorSchema,
|
37
|
-
TaskSource executorTaskSource)
|
30
|
+
@JsonProperty("inputType") PluginType inputPluginType,
|
31
|
+
@JsonProperty("outputType") PluginType outputPluginType,
|
32
|
+
@JsonProperty("filterTypes") List<PluginType> filterPluginTypes,
|
33
|
+
@JsonProperty("inputTask") TaskSource inputTaskSource,
|
34
|
+
@JsonProperty("outputTask") TaskSource outputTaskSource,
|
35
|
+
@JsonProperty("filterTasks") List<TaskSource> filterTaskSources,
|
36
|
+
@JsonProperty("schemas") List<Schema> schemas,
|
37
|
+
@JsonProperty("executorSchema") Schema executorSchema,
|
38
|
+
@JsonProperty("executorTask") TaskSource executorTaskSource)
|
38
39
|
{
|
39
40
|
this.inputPluginType = inputPluginType;
|
40
41
|
this.outputPluginType = outputPluginType;
|
@@ -47,36 +48,6 @@ public class ProcessTask
|
|
47
48
|
this.executorTaskSource = executorTaskSource;
|
48
49
|
}
|
49
50
|
|
50
|
-
// TODO Because TimestampType doesn't store timestamp_format, serializing and deserializing
|
51
|
-
// Schema loses timestamp_format information. Here uses SchemaConfig instead to preseve it.
|
52
|
-
|
53
|
-
@JsonCreator
|
54
|
-
ProcessTask(
|
55
|
-
@JsonProperty("inputType") PluginType inputPluginType,
|
56
|
-
@JsonProperty("outputType") PluginType outputPluginType,
|
57
|
-
@JsonProperty("filterTypes") List<PluginType> filterPluginTypes,
|
58
|
-
@JsonProperty("inputTask") TaskSource inputTaskSource,
|
59
|
-
@JsonProperty("outputTask") TaskSource outputTaskSource,
|
60
|
-
@JsonProperty("filterTasks") List<TaskSource> filterTaskSources,
|
61
|
-
@JsonProperty("schemas") List<SchemaConfig> schemas,
|
62
|
-
@JsonProperty("executorSchema") SchemaConfig executorSchema,
|
63
|
-
@JsonProperty("executorTask") TaskSource executorTaskSource)
|
64
|
-
{
|
65
|
-
this(inputPluginType, outputPluginType, filterPluginTypes,
|
66
|
-
inputTaskSource, outputTaskSource, filterTaskSources,
|
67
|
-
ImmutableList.copyOf(Lists.transform(schemas,
|
68
|
-
new Function<SchemaConfig, Schema>()
|
69
|
-
{
|
70
|
-
public Schema apply(SchemaConfig s)
|
71
|
-
{
|
72
|
-
return s.toSchema();
|
73
|
-
}
|
74
|
-
}
|
75
|
-
)),
|
76
|
-
executorSchema.toSchema(),
|
77
|
-
executorTaskSource);
|
78
|
-
}
|
79
|
-
|
80
51
|
@JsonProperty("inputType")
|
81
52
|
public PluginType getInputPluginType()
|
82
53
|
{
|
@@ -113,54 +84,18 @@ public class ProcessTask
|
|
113
84
|
return filterTaskSources;
|
114
85
|
}
|
115
86
|
|
116
|
-
@
|
87
|
+
@JsonProperty("schemas")
|
117
88
|
public List<Schema> getFilterSchemas()
|
118
89
|
{
|
119
90
|
return schemas;
|
120
91
|
}
|
121
92
|
|
122
|
-
@JsonProperty("
|
123
|
-
public List<SchemaConfig> getFilterSchemaConfigs()
|
124
|
-
{
|
125
|
-
return Lists.transform(schemas,
|
126
|
-
new Function<Schema, SchemaConfig>()
|
127
|
-
{
|
128
|
-
public SchemaConfig apply(Schema schema)
|
129
|
-
{
|
130
|
-
return schemaToSchemaConfig(schema);
|
131
|
-
}
|
132
|
-
});
|
133
|
-
}
|
134
|
-
|
135
|
-
@JsonIgnore
|
93
|
+
@JsonProperty("executorSchema")
|
136
94
|
public Schema getExecutorSchema()
|
137
95
|
{
|
138
96
|
return executorSchema;
|
139
97
|
}
|
140
98
|
|
141
|
-
@JsonProperty("executorSchema")
|
142
|
-
SchemaConfig getExecutorSchemaConfig()
|
143
|
-
{
|
144
|
-
return schemaToSchemaConfig(executorSchema);
|
145
|
-
}
|
146
|
-
|
147
|
-
private static SchemaConfig schemaToSchemaConfig(Schema s)
|
148
|
-
{
|
149
|
-
return new SchemaConfig(Lists.transform(s.getColumns(),
|
150
|
-
new Function<Column, ColumnConfig>()
|
151
|
-
{
|
152
|
-
public ColumnConfig apply(Column c)
|
153
|
-
{
|
154
|
-
if (c.getType() instanceof TimestampType) {
|
155
|
-
return new ColumnConfig(c.getName(), c.getType(), ((TimestampType) c.getType()).getFormat());
|
156
|
-
} else {
|
157
|
-
return new ColumnConfig(c.getName(), c.getType(), null);
|
158
|
-
}
|
159
|
-
}
|
160
|
-
}
|
161
|
-
));
|
162
|
-
}
|
163
|
-
|
164
99
|
@JsonIgnore
|
165
100
|
public Schema getInputSchema()
|
166
101
|
{
|
@@ -2,12 +2,35 @@ package org.embulk.spi;
|
|
2
2
|
|
3
3
|
import java.util.List;
|
4
4
|
import java.util.Objects;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
5
6
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
6
7
|
import com.fasterxml.jackson.annotation.JsonValue;
|
7
8
|
import org.embulk.spi.type.Type;
|
8
9
|
|
9
10
|
public class Schema
|
10
11
|
{
|
12
|
+
public static class Builder
|
13
|
+
{
|
14
|
+
private final ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
15
|
+
private int index = 0; // next version of Guava will have ImmutableList.Builder.size()
|
16
|
+
|
17
|
+
public synchronized Builder add(String name, Type type)
|
18
|
+
{
|
19
|
+
columns.add(new Column(index++, name, type));
|
20
|
+
return this;
|
21
|
+
}
|
22
|
+
|
23
|
+
public Schema build()
|
24
|
+
{
|
25
|
+
return new Schema(columns.build());
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
public static Builder builder()
|
30
|
+
{
|
31
|
+
return new Builder();
|
32
|
+
}
|
33
|
+
|
11
34
|
private final List<Column> columns;
|
12
35
|
|
13
36
|
@JsonCreator
|
@@ -22,6 +45,11 @@ public class Schema
|
|
22
45
|
return columns;
|
23
46
|
}
|
24
47
|
|
48
|
+
public int size()
|
49
|
+
{
|
50
|
+
return columns.size();
|
51
|
+
}
|
52
|
+
|
25
53
|
public int getColumnCount()
|
26
54
|
{
|
27
55
|
return columns.size();
|
@@ -54,9 +82,14 @@ public class Schema
|
|
54
82
|
return columns.isEmpty();
|
55
83
|
}
|
56
84
|
|
57
|
-
public
|
85
|
+
public Column lookupColumn(String name)
|
58
86
|
{
|
59
|
-
|
87
|
+
for (Column c : columns) {
|
88
|
+
if (c.getName().equals(name)) {
|
89
|
+
return c;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
throw new SchemaConfigException(String.format("Column '%s' is not found", name));
|
60
93
|
}
|
61
94
|
|
62
95
|
public int getFixedStorageSize()
|
@@ -2,9 +2,11 @@ package org.embulk.spi;
|
|
2
2
|
|
3
3
|
import java.util.List;
|
4
4
|
import java.util.Objects;
|
5
|
+
import com.google.common.base.Optional;
|
5
6
|
import com.google.common.collect.ImmutableList;
|
6
7
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
7
8
|
import com.fasterxml.jackson.annotation.JsonValue;
|
9
|
+
import org.embulk.spi.type.Type;
|
8
10
|
|
9
11
|
public class SchemaConfig
|
10
12
|
{
|
@@ -22,6 +24,46 @@ public class SchemaConfig
|
|
22
24
|
return columns;
|
23
25
|
}
|
24
26
|
|
27
|
+
public int size()
|
28
|
+
{
|
29
|
+
return columns.size();
|
30
|
+
}
|
31
|
+
|
32
|
+
public int getColumnCount()
|
33
|
+
{
|
34
|
+
return columns.size();
|
35
|
+
}
|
36
|
+
|
37
|
+
public ColumnConfig getColumn(int index)
|
38
|
+
{
|
39
|
+
return columns.get(index);
|
40
|
+
}
|
41
|
+
|
42
|
+
public String getColumnName(int index)
|
43
|
+
{
|
44
|
+
return getColumn(index).getName();
|
45
|
+
}
|
46
|
+
|
47
|
+
public Type getColumnType(int index)
|
48
|
+
{
|
49
|
+
return getColumn(index).getType();
|
50
|
+
}
|
51
|
+
|
52
|
+
public boolean isEmpty()
|
53
|
+
{
|
54
|
+
return columns.isEmpty();
|
55
|
+
}
|
56
|
+
|
57
|
+
public ColumnConfig lookupColumn(String name)
|
58
|
+
{
|
59
|
+
for (ColumnConfig c : columns) {
|
60
|
+
if (c.getName().equals(name)) {
|
61
|
+
return c;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
throw new SchemaConfigException(String.format("Column '%s' is not found", name));
|
65
|
+
}
|
66
|
+
|
25
67
|
public Schema toSchema()
|
26
68
|
{
|
27
69
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|