embulk-parser-jsonl 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +5 -0
- data/build.gradle +5 -5
- data/embulk-parser-jsonl.gemspec +1 -1
- data/example/example.yml +23 -0
- data/example/sample.json +2 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java +14 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlColumnOption.java +16 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +44 -127
- data/src/main/java/org/embulk/parser/jsonl/getter/ColumnGetterFactory.java +24 -0
- data/src/main/java/org/embulk/parser/jsonl/getter/CommonColumnGetter.java +131 -0
- data/src/main/java/org/embulk/parser/jsonl/getter/StringColumnGetter.java +68 -0
- data/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +49 -0
- data/src/test/resources/org/embulk/parser/jsonl/use_column_options.yml +9 -0
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f4944036ca4cacbe9f2b8a0943503f77c92aca1
|
4
|
+
data.tar.gz: a11616b53812123915ccdd8e88938471f81b2aea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf717db902192c17124a393b063972067e2e9d46b66dfc7247cf427f03bcec551d10c799c2ec6923ca486ba44ec0017d72598280106d356eb3f9464fdbf94d0f
|
7
|
+
data.tar.gz: be23fbef9cba986f15fecebd91ba15059f1fea6fe13aaebd62373488aa092e6f42b7fb13ab471bf125c38f8ab587a7a0bfe62dc6b465dece554a80e6e0792e25
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.1.2 - 2016-03-27
|
2
|
+
|
3
|
+
[new feature] Support column_options option [#4](https://github.com/shun0102/embulk-parser-jsonl/pull/4)
|
4
|
+
[maintenance] Upgrade Embulk v0.8.8 [#6](https://github.com/shun0102/embulk-parser-jsonl/pull/6)
|
5
|
+
|
1
6
|
## 0.1.1 - 2016-03-17
|
2
7
|
|
3
8
|
[fix bug] Avoid org.embulk.spi.json.JsonParseException: Unable to parse empty string [#5](https://github.com/shun0102/embulk-parser-jsonl/pull/5)
|
data/build.gradle
CHANGED
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.2"
|
17
17
|
|
18
18
|
compileJava.options.encoding = 'UTF-8' // source encoding
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8.
|
24
|
-
provided "org.embulk:embulk-core:0.8.
|
23
|
+
compile "org.embulk:embulk-core:0.8.8"
|
24
|
+
provided "org.embulk:embulk-core:0.8.8"
|
25
25
|
|
26
26
|
testCompile "junit:junit:4.+"
|
27
|
-
testCompile "org.embulk:embulk-core:0.8.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.8.
|
27
|
+
testCompile "org.embulk:embulk-core:0.8.8:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.8.8"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
data/embulk-parser-jsonl.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-jsonl"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.2"
|
5
5
|
spec.authors = ["Shunsuke Mikami"]
|
6
6
|
spec.summary = "Jsonl parser plugin for Embulk"
|
7
7
|
spec.description = "Parses Jsonl files read by other file input plugins."
|
data/example/example.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "sample"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "foo", type: "string"}
|
8
|
+
- {name: "bool", type: "boolean"}
|
9
|
+
- {name: "bool_str", type: "string"}
|
10
|
+
- {name: "int", type: "long"}
|
11
|
+
- {name: "int_str", type: "string"}
|
12
|
+
- {name: "time", type: "timestamp", format: '%Y-%m-%d %H:%M:%S'}
|
13
|
+
- {name: "double", type: "double"}
|
14
|
+
- {name: "double_str", type: "string"}
|
15
|
+
- {name: "array", type: "json"}
|
16
|
+
column_options:
|
17
|
+
bool_str: {type: "boolean"}
|
18
|
+
int_str: {type: "long"}
|
19
|
+
double_str: {type: "double"}
|
20
|
+
|
21
|
+
|
22
|
+
out:
|
23
|
+
type: stdout
|
data/example/sample.json
ADDED
@@ -0,0 +1,2 @@
|
|
1
|
+
{"foo": "bar", "bool": true, "bool_str": "true", "int": 1, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 1.2, "double_str": "2.4", "array": [1, 2, 3]}
|
2
|
+
{"foo": null, "bool": false, "bool_str": "false", "int": 1, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 1.2, "double_str": "2.4", "array": [{"inner": "hoge"}, {"inner": 1.5}]}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
|
5
|
+
public class JsonRecordValidateException
|
6
|
+
extends DataException {
|
7
|
+
JsonRecordValidateException(String message) {
|
8
|
+
super(message);
|
9
|
+
}
|
10
|
+
|
11
|
+
public JsonRecordValidateException(Throwable cause) {
|
12
|
+
super(cause);
|
13
|
+
}
|
14
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import org.embulk.spi.type.Type;
|
7
|
+
|
8
|
+
import com.google.common.base.Optional;
|
9
|
+
|
10
|
+
public interface JsonlColumnOption
|
11
|
+
extends Task
|
12
|
+
{
|
13
|
+
@Config("type")
|
14
|
+
@ConfigDefault("null")
|
15
|
+
Optional<Type> getType();
|
16
|
+
}
|
@@ -1,6 +1,8 @@
|
|
1
1
|
package org.embulk.parser.jsonl;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Supplier;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
4
6
|
import com.google.common.collect.ImmutableMap;
|
5
7
|
import org.embulk.config.Config;
|
6
8
|
import org.embulk.config.ConfigDefault;
|
@@ -8,8 +10,10 @@ import org.embulk.config.ConfigException;
|
|
8
10
|
import org.embulk.config.ConfigSource;
|
9
11
|
import org.embulk.config.Task;
|
10
12
|
import org.embulk.config.TaskSource;
|
13
|
+
import org.embulk.parser.jsonl.getter.CommonColumnGetter;
|
14
|
+
import org.embulk.parser.jsonl.getter.ColumnGetterFactory;
|
11
15
|
import org.embulk.spi.Column;
|
12
|
-
import org.embulk.spi.
|
16
|
+
import org.embulk.spi.ColumnConfig;
|
13
17
|
import org.embulk.spi.DataException;
|
14
18
|
import org.embulk.spi.Exec;
|
15
19
|
import org.embulk.spi.FileInput;
|
@@ -23,10 +27,6 @@ import org.embulk.spi.json.JsonParser;
|
|
23
27
|
import org.embulk.spi.time.TimestampParser;
|
24
28
|
import org.embulk.spi.util.LineDecoder;
|
25
29
|
import org.embulk.spi.util.Timestamps;
|
26
|
-
import org.msgpack.core.MessageTypeException;
|
27
|
-
import org.msgpack.value.BooleanValue;
|
28
|
-
import org.msgpack.value.FloatValue;
|
29
|
-
import org.msgpack.value.IntegerValue;
|
30
30
|
import org.msgpack.value.Value;
|
31
31
|
import org.slf4j.Logger;
|
32
32
|
|
@@ -52,6 +52,10 @@ public class JsonlParserPlugin
|
|
52
52
|
@Config("stop_on_invalid_record")
|
53
53
|
@ConfigDefault("false")
|
54
54
|
boolean getStopOnInvalidRecord();
|
55
|
+
|
56
|
+
@Config("column_options")
|
57
|
+
@ConfigDefault("{}")
|
58
|
+
Map<String, JsonlColumnOption> getColumnOptions();
|
55
59
|
}
|
56
60
|
|
57
61
|
private final Logger log;
|
@@ -69,7 +73,14 @@ public class JsonlParserPlugin
|
|
69
73
|
public void transaction(ConfigSource configSource, Control control)
|
70
74
|
{
|
71
75
|
PluginTask task = configSource.loadConfig(PluginTask.class);
|
72
|
-
|
76
|
+
SchemaConfig schemaConfig = getSchemaConfig(task);
|
77
|
+
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
78
|
+
for (int i = 0; i < schemaConfig.getColumnCount(); i++) {
|
79
|
+
ColumnConfig columnConfig = schemaConfig.getColumn(i);
|
80
|
+
JsonlColumnOption columnOption = columnOptionOf(task.getColumnOptions(), columnConfig.getName());
|
81
|
+
columns.add(new Column(i, columnConfig.getName(), columnOption.getType().or(columnConfig.getType())));
|
82
|
+
}
|
83
|
+
control.run(task.dump(), new Schema(columns.build()));
|
73
84
|
}
|
74
85
|
|
75
86
|
// this method is to keep the backward compatibility of 'schema' option.
|
@@ -97,12 +108,21 @@ public class JsonlParserPlugin
|
|
97
108
|
|
98
109
|
setColumnNameValues(schema);
|
99
110
|
|
100
|
-
final
|
111
|
+
final SchemaConfig schemaConfig = getSchemaConfig(task);
|
112
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, schemaConfig);
|
101
113
|
final LineDecoder decoder = newLineDecoder(input, task);
|
102
114
|
final JsonParser jsonParser = newJsonParser();
|
103
115
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
104
116
|
|
105
117
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
118
|
+
ColumnGetterFactory factory = new ColumnGetterFactory(pageBuilder, timestampParsers);
|
119
|
+
ImmutableMap.Builder<String, CommonColumnGetter> columnGettersBuilder = ImmutableMap.builder();
|
120
|
+
for (ColumnConfig columnConfig : schemaConfig.getColumns()) {
|
121
|
+
CommonColumnGetter columnGetter = factory.newColumnGetter(columnConfig);
|
122
|
+
columnGettersBuilder.put(columnConfig.getName(), columnGetter);
|
123
|
+
}
|
124
|
+
ImmutableMap<String, CommonColumnGetter> columnGetters = columnGettersBuilder.build();
|
125
|
+
|
106
126
|
while (decoder.nextFile()) { // TODO this implementation should be improved with new JsonParser API on Embulk v0.8.3
|
107
127
|
lineNumber = 0;
|
108
128
|
|
@@ -117,115 +137,12 @@ public class JsonlParserPlugin
|
|
117
137
|
}
|
118
138
|
|
119
139
|
final Map<Value, Value> record = value.asMapValue().map();
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
if (isNil(v)) {
|
127
|
-
pageBuilder.setNull(column);
|
128
|
-
}
|
129
|
-
else {
|
130
|
-
try {
|
131
|
-
pageBuilder.setBoolean(column, ((BooleanValue) v).getBoolean());
|
132
|
-
}
|
133
|
-
catch (MessageTypeException e) {
|
134
|
-
throw new JsonRecordValidateException(e);
|
135
|
-
}
|
136
|
-
}
|
137
|
-
}
|
138
|
-
|
139
|
-
@Override
|
140
|
-
public void longColumn(Column column)
|
141
|
-
{
|
142
|
-
Value v = record.get(getColumnNameValue(column));
|
143
|
-
if (isNil(v)) {
|
144
|
-
pageBuilder.setNull(column);
|
145
|
-
}
|
146
|
-
else {
|
147
|
-
try {
|
148
|
-
pageBuilder.setLong(column, ((IntegerValue) v).asLong());
|
149
|
-
}
|
150
|
-
catch (MessageTypeException e) {
|
151
|
-
throw new JsonRecordValidateException(e);
|
152
|
-
}
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
@Override
|
157
|
-
public void doubleColumn(Column column)
|
158
|
-
{
|
159
|
-
Value v = record.get(getColumnNameValue(column));
|
160
|
-
if (isNil(v)) {
|
161
|
-
pageBuilder.setNull(column);
|
162
|
-
}
|
163
|
-
else {
|
164
|
-
try {
|
165
|
-
pageBuilder.setDouble(column, ((FloatValue) v).toDouble());
|
166
|
-
}
|
167
|
-
catch (MessageTypeException e) {
|
168
|
-
throw new JsonRecordValidateException(e);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
@Override
|
174
|
-
public void stringColumn(Column column)
|
175
|
-
{
|
176
|
-
Value v = record.get(getColumnNameValue(column));
|
177
|
-
if (isNil(v)) {
|
178
|
-
pageBuilder.setNull(column);
|
179
|
-
}
|
180
|
-
else {
|
181
|
-
try {
|
182
|
-
pageBuilder.setString(column, v.toString());
|
183
|
-
}
|
184
|
-
catch (MessageTypeException e) {
|
185
|
-
throw new JsonRecordValidateException(e);
|
186
|
-
}
|
187
|
-
}
|
188
|
-
}
|
189
|
-
|
190
|
-
@Override
|
191
|
-
public void timestampColumn(Column column)
|
192
|
-
{
|
193
|
-
Value v = record.get(getColumnNameValue(column));
|
194
|
-
if (isNil(v)) {
|
195
|
-
pageBuilder.setNull(column);
|
196
|
-
}
|
197
|
-
else {
|
198
|
-
try {
|
199
|
-
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v.toString()));
|
200
|
-
}
|
201
|
-
catch (MessageTypeException e) {
|
202
|
-
throw new JsonRecordValidateException(e);
|
203
|
-
}
|
204
|
-
}
|
205
|
-
}
|
206
|
-
|
207
|
-
@Override
|
208
|
-
public void jsonColumn(Column column)
|
209
|
-
{
|
210
|
-
Value v = record.get(getColumnNameValue(column));
|
211
|
-
if (isNil(v)) {
|
212
|
-
pageBuilder.setNull(column);
|
213
|
-
}
|
214
|
-
else {
|
215
|
-
try {
|
216
|
-
pageBuilder.setJson(column, v);
|
217
|
-
}
|
218
|
-
catch (MessageTypeException e) {
|
219
|
-
throw new JsonRecordValidateException(e);
|
220
|
-
}
|
221
|
-
}
|
222
|
-
}
|
223
|
-
|
224
|
-
private boolean isNil(Value v)
|
225
|
-
{
|
226
|
-
return v == null || v.isNilValue();
|
227
|
-
}
|
228
|
-
});
|
140
|
+
for (Column column : schema.getColumns()) {
|
141
|
+
Value v = record.get(getColumnNameValue(column));
|
142
|
+
CommonColumnGetter columnGetter = columnGetters.get(column.getName());
|
143
|
+
columnGetter.setValue(v);
|
144
|
+
column.visit(columnGetter);
|
145
|
+
}
|
229
146
|
|
230
147
|
pageBuilder.addRecord();
|
231
148
|
}
|
@@ -267,17 +184,17 @@ public class JsonlParserPlugin
|
|
267
184
|
return new JsonParser();
|
268
185
|
}
|
269
186
|
|
270
|
-
static
|
271
|
-
extends DataException
|
187
|
+
private static JsonlColumnOption columnOptionOf(Map<String, JsonlColumnOption> columnOptions, String columnName)
|
272
188
|
{
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
189
|
+
return Optional.fromNullable(columnOptions.get(columnName)).or(
|
190
|
+
// default column option
|
191
|
+
new Supplier<JsonlColumnOption>()
|
192
|
+
{
|
193
|
+
public JsonlColumnOption get()
|
194
|
+
{
|
195
|
+
return Exec.newConfigSource().loadConfig(JsonlColumnOption.class);
|
196
|
+
}
|
197
|
+
});
|
282
198
|
}
|
199
|
+
|
283
200
|
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.spi.ColumnConfig;
|
4
|
+
import org.embulk.spi.PageBuilder;
|
5
|
+
import org.embulk.spi.time.TimestampParser;
|
6
|
+
|
7
|
+
public class ColumnGetterFactory {
|
8
|
+
private PageBuilder pageBuilder;
|
9
|
+
private TimestampParser[] timestampParsers;
|
10
|
+
|
11
|
+
public ColumnGetterFactory(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
|
12
|
+
this.pageBuilder = pageBuilder;
|
13
|
+
this.timestampParsers = timestampParsers;
|
14
|
+
}
|
15
|
+
|
16
|
+
public CommonColumnGetter newColumnGetter(ColumnConfig columnConfig) {
|
17
|
+
switch (columnConfig.getType().getName()) {
|
18
|
+
case "string":
|
19
|
+
return new StringColumnGetter(pageBuilder, timestampParsers);
|
20
|
+
default:
|
21
|
+
return new CommonColumnGetter(pageBuilder, timestampParsers);
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
@@ -0,0 +1,131 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.parser.jsonl.JsonRecordValidateException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.ColumnVisitor;
|
6
|
+
import org.embulk.spi.PageBuilder;
|
7
|
+
import org.embulk.spi.time.TimestampParser;
|
8
|
+
import org.msgpack.core.MessageTypeException;
|
9
|
+
import org.msgpack.value.BooleanValue;
|
10
|
+
import org.msgpack.value.FloatValue;
|
11
|
+
import org.msgpack.value.IntegerValue;
|
12
|
+
import org.msgpack.value.Value;
|
13
|
+
|
14
|
+
public class CommonColumnGetter implements ColumnVisitor {
|
15
|
+
protected final PageBuilder pageBuilder;
|
16
|
+
protected final TimestampParser[] timestampParsers;
|
17
|
+
|
18
|
+
protected Value value;
|
19
|
+
|
20
|
+
public CommonColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
21
|
+
{
|
22
|
+
this.pageBuilder = pageBuilder;
|
23
|
+
this.timestampParsers = timestampParsers;
|
24
|
+
}
|
25
|
+
|
26
|
+
public void setValue(Value value)
|
27
|
+
{
|
28
|
+
this.value = value;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public void booleanColumn(Column column)
|
33
|
+
{
|
34
|
+
if (isNil(value)) {
|
35
|
+
pageBuilder.setNull(column);
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
try {
|
39
|
+
pageBuilder.setBoolean(column, ((BooleanValue) value).getBoolean());
|
40
|
+
}
|
41
|
+
catch (MessageTypeException e) {
|
42
|
+
throw new JsonRecordValidateException(e);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
@Override
|
48
|
+
public void longColumn(Column column)
|
49
|
+
{
|
50
|
+
if (isNil(value)) {
|
51
|
+
pageBuilder.setNull(column);
|
52
|
+
}
|
53
|
+
else {
|
54
|
+
try {
|
55
|
+
pageBuilder.setLong(column, ((IntegerValue) value).asLong());
|
56
|
+
}
|
57
|
+
catch (MessageTypeException e) {
|
58
|
+
throw new JsonRecordValidateException(e);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
@Override
|
64
|
+
public void doubleColumn(Column column)
|
65
|
+
{
|
66
|
+
if (isNil(value)) {
|
67
|
+
pageBuilder.setNull(column);
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
try {
|
71
|
+
pageBuilder.setDouble(column, ((FloatValue) value).toDouble());
|
72
|
+
}
|
73
|
+
catch (MessageTypeException e) {
|
74
|
+
throw new JsonRecordValidateException(e);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void stringColumn(Column column)
|
81
|
+
{
|
82
|
+
if (isNil(value)) {
|
83
|
+
pageBuilder.setNull(column);
|
84
|
+
}
|
85
|
+
else {
|
86
|
+
try {
|
87
|
+
pageBuilder.setString(column, value.toString());
|
88
|
+
}
|
89
|
+
catch (MessageTypeException e) {
|
90
|
+
throw new JsonRecordValidateException(e);
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
@Override
|
96
|
+
public void timestampColumn(Column column)
|
97
|
+
{
|
98
|
+
if (isNil(value)) {
|
99
|
+
pageBuilder.setNull(column);
|
100
|
+
}
|
101
|
+
else {
|
102
|
+
try {
|
103
|
+
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.toString()));
|
104
|
+
}
|
105
|
+
catch (MessageTypeException e) {
|
106
|
+
throw new JsonRecordValidateException(e);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
@Override
|
112
|
+
public void jsonColumn(Column column)
|
113
|
+
{
|
114
|
+
if (isNil(value)) {
|
115
|
+
pageBuilder.setNull(column);
|
116
|
+
}
|
117
|
+
else {
|
118
|
+
try {
|
119
|
+
pageBuilder.setJson(column, value);
|
120
|
+
}
|
121
|
+
catch (MessageTypeException e) {
|
122
|
+
throw new JsonRecordValidateException(e);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
protected boolean isNil(Value v)
|
128
|
+
{
|
129
|
+
return v == null || v.isNilValue();
|
130
|
+
}
|
131
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.parser.jsonl.JsonRecordValidateException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.time.TimestampParser;
|
7
|
+
import org.msgpack.core.MessageTypeException;
|
8
|
+
|
9
|
+
public class StringColumnGetter extends CommonColumnGetter {
|
10
|
+
|
11
|
+
public StringColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
12
|
+
{
|
13
|
+
super(pageBuilder, timestampParsers);
|
14
|
+
}
|
15
|
+
|
16
|
+
private String getValueAsString()
|
17
|
+
{
|
18
|
+
return value.toString();
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void booleanColumn(Column column)
|
23
|
+
{
|
24
|
+
if (isNil(value)) {
|
25
|
+
pageBuilder.setNull(column);
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
try {
|
29
|
+
pageBuilder.setBoolean(column, Boolean.valueOf(getValueAsString()));
|
30
|
+
}
|
31
|
+
catch (MessageTypeException e) {
|
32
|
+
throw new JsonRecordValidateException(e);
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void longColumn(Column column)
|
39
|
+
{
|
40
|
+
if (isNil(value)) {
|
41
|
+
pageBuilder.setNull(column);
|
42
|
+
}
|
43
|
+
else {
|
44
|
+
try {
|
45
|
+
pageBuilder.setLong(column, Long.valueOf(getValueAsString()));
|
46
|
+
}
|
47
|
+
catch (MessageTypeException | NumberFormatException e) {
|
48
|
+
throw new JsonRecordValidateException(e);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void doubleColumn(Column column)
|
55
|
+
{
|
56
|
+
if (isNil(value)) {
|
57
|
+
pageBuilder.setNull(column);
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
try {
|
61
|
+
pageBuilder.setDouble(column, Double.valueOf(getValueAsString()));
|
62
|
+
}
|
63
|
+
catch (MessageTypeException | NumberFormatException e) {
|
64
|
+
throw new JsonRecordValidateException(e);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
}
|
@@ -3,10 +3,12 @@ package org.embulk.parser.jsonl;
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import com.google.common.collect.Lists;
|
5
5
|
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigLoader;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.config.TaskSource;
|
8
9
|
import org.embulk.spi.ColumnConfig;
|
9
10
|
import org.embulk.spi.DataException;
|
11
|
+
import org.embulk.spi.Exec;
|
10
12
|
import org.embulk.spi.FileInput;
|
11
13
|
import org.embulk.spi.ParserPlugin;
|
12
14
|
import org.embulk.spi.Schema;
|
@@ -21,6 +23,7 @@ import org.junit.Rule;
|
|
21
23
|
import org.junit.Test;
|
22
24
|
|
23
25
|
import java.io.ByteArrayInputStream;
|
26
|
+
import java.io.File;
|
24
27
|
import java.io.IOException;
|
25
28
|
import java.io.InputStream;
|
26
29
|
import java.util.List;
|
@@ -176,11 +179,57 @@ public class TestJsonlParserPlugin
|
|
176
179
|
}
|
177
180
|
}
|
178
181
|
|
182
|
+
@Test
|
183
|
+
public void useColumnOptions()
|
184
|
+
throws Exception
|
185
|
+
{
|
186
|
+
|
187
|
+
SchemaConfig schema = schema(
|
188
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE));
|
189
|
+
File yamlFile = getResourceFile("use_column_options.yml");
|
190
|
+
ConfigSource config = getConfigFromYamlFile(yamlFile);
|
191
|
+
|
192
|
+
transaction(config, fileInput(
|
193
|
+
"{\"_c0\":\"true\",\"_c1\":\"10\",\"_c2\":\"0.1\"}",
|
194
|
+
"{\"_c0\":\"false\",\"_c1\":\"-10\",\"_c2\":\"1.0\"}"
|
195
|
+
));
|
196
|
+
|
197
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
198
|
+
assertEquals(2, records.size());
|
199
|
+
|
200
|
+
Object[] record;
|
201
|
+
{
|
202
|
+
record = records.get(0);
|
203
|
+
assertEquals(true, record[0]);
|
204
|
+
assertEquals(10L, record[1]);
|
205
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
206
|
+
}
|
207
|
+
{
|
208
|
+
record = records.get(1);
|
209
|
+
assertEquals(false, record[0]);
|
210
|
+
assertEquals(-10L, record[1]);
|
211
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
179
215
|
private ConfigSource config()
|
180
216
|
{
|
181
217
|
return runtime.getExec().newConfigSource();
|
182
218
|
}
|
183
219
|
|
220
|
+
private File getResourceFile(String resourceName)
|
221
|
+
throws IOException
|
222
|
+
{
|
223
|
+
return new File(this.getClass().getResource(resourceName).getFile());
|
224
|
+
}
|
225
|
+
|
226
|
+
private ConfigSource getConfigFromYamlFile(File yamlFile)
|
227
|
+
throws IOException
|
228
|
+
{
|
229
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
230
|
+
return loader.fromYamlFile(yamlFile);
|
231
|
+
}
|
232
|
+
|
184
233
|
private void transaction(ConfigSource config, final FileInput input)
|
185
234
|
{
|
186
235
|
plugin.transaction(config, new ParserPlugin.Control()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shunsuke Mikami
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,6 +53,8 @@ files:
|
|
53
53
|
- Rakefile
|
54
54
|
- build.gradle
|
55
55
|
- embulk-parser-jsonl.gemspec
|
56
|
+
- example/example.yml
|
57
|
+
- example/sample.json
|
56
58
|
- gradle/wrapper/gradle-wrapper.jar
|
57
59
|
- gradle/wrapper/gradle-wrapper.properties
|
58
60
|
- gradlew
|
@@ -60,9 +62,15 @@ files:
|
|
60
62
|
- lib/embulk/guess/jsonl.rb
|
61
63
|
- lib/embulk/parser/jsonl.rb
|
62
64
|
- settings.gradle
|
65
|
+
- src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java
|
66
|
+
- src/main/java/org/embulk/parser/jsonl/JsonlColumnOption.java
|
63
67
|
- src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java
|
68
|
+
- src/main/java/org/embulk/parser/jsonl/getter/ColumnGetterFactory.java
|
69
|
+
- src/main/java/org/embulk/parser/jsonl/getter/CommonColumnGetter.java
|
70
|
+
- src/main/java/org/embulk/parser/jsonl/getter/StringColumnGetter.java
|
64
71
|
- src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java
|
65
|
-
-
|
72
|
+
- src/test/resources/org/embulk/parser/jsonl/use_column_options.yml
|
73
|
+
- classpath/embulk-parser-jsonl-0.1.2.jar
|
66
74
|
homepage: https://github.com/shun0102/embulk-parser-jsonl
|
67
75
|
licenses:
|
68
76
|
- MIT
|