embulk-parser-jsonl 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +5 -0
- data/build.gradle +5 -5
- data/embulk-parser-jsonl.gemspec +1 -1
- data/example/example.yml +23 -0
- data/example/sample.json +2 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java +14 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlColumnOption.java +16 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +44 -127
- data/src/main/java/org/embulk/parser/jsonl/getter/ColumnGetterFactory.java +24 -0
- data/src/main/java/org/embulk/parser/jsonl/getter/CommonColumnGetter.java +131 -0
- data/src/main/java/org/embulk/parser/jsonl/getter/StringColumnGetter.java +68 -0
- data/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +49 -0
- data/src/test/resources/org/embulk/parser/jsonl/use_column_options.yml +9 -0
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f4944036ca4cacbe9f2b8a0943503f77c92aca1
|
4
|
+
data.tar.gz: a11616b53812123915ccdd8e88938471f81b2aea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf717db902192c17124a393b063972067e2e9d46b66dfc7247cf427f03bcec551d10c799c2ec6923ca486ba44ec0017d72598280106d356eb3f9464fdbf94d0f
|
7
|
+
data.tar.gz: be23fbef9cba986f15fecebd91ba15059f1fea6fe13aaebd62373488aa092e6f42b7fb13ab471bf125c38f8ab587a7a0bfe62dc6b465dece554a80e6e0792e25
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.1.2 - 2016-03-27
|
2
|
+
|
3
|
+
[new feature] Support column_options option [#4](https://github.com/shun0102/embulk-parser-jsonl/pull/4)
|
4
|
+
[maintenance] Upgrade Embulk v0.8.8 [#6](https://github.com/shun0102/embulk-parser-jsonl/pull/6)
|
5
|
+
|
1
6
|
## 0.1.1 - 2016-03-17
|
2
7
|
|
3
8
|
[fix bug] Avoid org.embulk.spi.json.JsonParseException: Unable to parse empty string [#5](https://github.com/shun0102/embulk-parser-jsonl/pull/5)
|
data/build.gradle
CHANGED
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.1.
|
16
|
+
version = "0.1.2"
|
17
17
|
|
18
18
|
compileJava.options.encoding = 'UTF-8' // source encoding
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8.
|
24
|
-
provided "org.embulk:embulk-core:0.8.
|
23
|
+
compile "org.embulk:embulk-core:0.8.8"
|
24
|
+
provided "org.embulk:embulk-core:0.8.8"
|
25
25
|
|
26
26
|
testCompile "junit:junit:4.+"
|
27
|
-
testCompile "org.embulk:embulk-core:0.8.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.8.
|
27
|
+
testCompile "org.embulk:embulk-core:0.8.8:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.8.8"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
data/embulk-parser-jsonl.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-jsonl"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.2"
|
5
5
|
spec.authors = ["Shunsuke Mikami"]
|
6
6
|
spec.summary = "Jsonl parser plugin for Embulk"
|
7
7
|
spec.description = "Parses Jsonl files read by other file input plugins."
|
data/example/example.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: "sample"
|
4
|
+
parser:
|
5
|
+
type: jsonl
|
6
|
+
columns:
|
7
|
+
- {name: "foo", type: "string"}
|
8
|
+
- {name: "bool", type: "boolean"}
|
9
|
+
- {name: "bool_str", type: "string"}
|
10
|
+
- {name: "int", type: "long"}
|
11
|
+
- {name: "int_str", type: "string"}
|
12
|
+
- {name: "time", type: "timestamp", format: '%Y-%m-%d %H:%M:%S'}
|
13
|
+
- {name: "double", type: "double"}
|
14
|
+
- {name: "double_str", type: "string"}
|
15
|
+
- {name: "array", type: "json"}
|
16
|
+
column_options:
|
17
|
+
bool_str: {type: "boolean"}
|
18
|
+
int_str: {type: "long"}
|
19
|
+
double_str: {type: "double"}
|
20
|
+
|
21
|
+
|
22
|
+
out:
|
23
|
+
type: stdout
|
data/example/sample.json
ADDED
@@ -0,0 +1,2 @@
|
|
1
|
+
{"foo": "bar", "bool": true, "bool_str": "true", "int": 1, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 1.2, "double_str": "2.4", "array": [1, 2, 3]}
|
2
|
+
{"foo": null, "bool": false, "bool_str": "false", "int": 1, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 1.2, "double_str": "2.4", "array": [{"inner": "hoge"}, {"inner": 1.5}]}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
|
5
|
+
public class JsonRecordValidateException
|
6
|
+
extends DataException {
|
7
|
+
JsonRecordValidateException(String message) {
|
8
|
+
super(message);
|
9
|
+
}
|
10
|
+
|
11
|
+
public JsonRecordValidateException(Throwable cause) {
|
12
|
+
super(cause);
|
13
|
+
}
|
14
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
import org.embulk.spi.type.Type;
|
7
|
+
|
8
|
+
import com.google.common.base.Optional;
|
9
|
+
|
10
|
+
public interface JsonlColumnOption
|
11
|
+
extends Task
|
12
|
+
{
|
13
|
+
@Config("type")
|
14
|
+
@ConfigDefault("null")
|
15
|
+
Optional<Type> getType();
|
16
|
+
}
|
@@ -1,6 +1,8 @@
|
|
1
1
|
package org.embulk.parser.jsonl;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Supplier;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
4
6
|
import com.google.common.collect.ImmutableMap;
|
5
7
|
import org.embulk.config.Config;
|
6
8
|
import org.embulk.config.ConfigDefault;
|
@@ -8,8 +10,10 @@ import org.embulk.config.ConfigException;
|
|
8
10
|
import org.embulk.config.ConfigSource;
|
9
11
|
import org.embulk.config.Task;
|
10
12
|
import org.embulk.config.TaskSource;
|
13
|
+
import org.embulk.parser.jsonl.getter.CommonColumnGetter;
|
14
|
+
import org.embulk.parser.jsonl.getter.ColumnGetterFactory;
|
11
15
|
import org.embulk.spi.Column;
|
12
|
-
import org.embulk.spi.
|
16
|
+
import org.embulk.spi.ColumnConfig;
|
13
17
|
import org.embulk.spi.DataException;
|
14
18
|
import org.embulk.spi.Exec;
|
15
19
|
import org.embulk.spi.FileInput;
|
@@ -23,10 +27,6 @@ import org.embulk.spi.json.JsonParser;
|
|
23
27
|
import org.embulk.spi.time.TimestampParser;
|
24
28
|
import org.embulk.spi.util.LineDecoder;
|
25
29
|
import org.embulk.spi.util.Timestamps;
|
26
|
-
import org.msgpack.core.MessageTypeException;
|
27
|
-
import org.msgpack.value.BooleanValue;
|
28
|
-
import org.msgpack.value.FloatValue;
|
29
|
-
import org.msgpack.value.IntegerValue;
|
30
30
|
import org.msgpack.value.Value;
|
31
31
|
import org.slf4j.Logger;
|
32
32
|
|
@@ -52,6 +52,10 @@ public class JsonlParserPlugin
|
|
52
52
|
@Config("stop_on_invalid_record")
|
53
53
|
@ConfigDefault("false")
|
54
54
|
boolean getStopOnInvalidRecord();
|
55
|
+
|
56
|
+
@Config("column_options")
|
57
|
+
@ConfigDefault("{}")
|
58
|
+
Map<String, JsonlColumnOption> getColumnOptions();
|
55
59
|
}
|
56
60
|
|
57
61
|
private final Logger log;
|
@@ -69,7 +73,14 @@ public class JsonlParserPlugin
|
|
69
73
|
public void transaction(ConfigSource configSource, Control control)
|
70
74
|
{
|
71
75
|
PluginTask task = configSource.loadConfig(PluginTask.class);
|
72
|
-
|
76
|
+
SchemaConfig schemaConfig = getSchemaConfig(task);
|
77
|
+
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
78
|
+
for (int i = 0; i < schemaConfig.getColumnCount(); i++) {
|
79
|
+
ColumnConfig columnConfig = schemaConfig.getColumn(i);
|
80
|
+
JsonlColumnOption columnOption = columnOptionOf(task.getColumnOptions(), columnConfig.getName());
|
81
|
+
columns.add(new Column(i, columnConfig.getName(), columnOption.getType().or(columnConfig.getType())));
|
82
|
+
}
|
83
|
+
control.run(task.dump(), new Schema(columns.build()));
|
73
84
|
}
|
74
85
|
|
75
86
|
// this method is to keep the backward compatibility of 'schema' option.
|
@@ -97,12 +108,21 @@ public class JsonlParserPlugin
|
|
97
108
|
|
98
109
|
setColumnNameValues(schema);
|
99
110
|
|
100
|
-
final
|
111
|
+
final SchemaConfig schemaConfig = getSchemaConfig(task);
|
112
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, schemaConfig);
|
101
113
|
final LineDecoder decoder = newLineDecoder(input, task);
|
102
114
|
final JsonParser jsonParser = newJsonParser();
|
103
115
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
104
116
|
|
105
117
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
118
|
+
ColumnGetterFactory factory = new ColumnGetterFactory(pageBuilder, timestampParsers);
|
119
|
+
ImmutableMap.Builder<String, CommonColumnGetter> columnGettersBuilder = ImmutableMap.builder();
|
120
|
+
for (ColumnConfig columnConfig : schemaConfig.getColumns()) {
|
121
|
+
CommonColumnGetter columnGetter = factory.newColumnGetter(columnConfig);
|
122
|
+
columnGettersBuilder.put(columnConfig.getName(), columnGetter);
|
123
|
+
}
|
124
|
+
ImmutableMap<String, CommonColumnGetter> columnGetters = columnGettersBuilder.build();
|
125
|
+
|
106
126
|
while (decoder.nextFile()) { // TODO this implementation should be improved with new JsonParser API on Embulk v0.8.3
|
107
127
|
lineNumber = 0;
|
108
128
|
|
@@ -117,115 +137,12 @@ public class JsonlParserPlugin
|
|
117
137
|
}
|
118
138
|
|
119
139
|
final Map<Value, Value> record = value.asMapValue().map();
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
if (isNil(v)) {
|
127
|
-
pageBuilder.setNull(column);
|
128
|
-
}
|
129
|
-
else {
|
130
|
-
try {
|
131
|
-
pageBuilder.setBoolean(column, ((BooleanValue) v).getBoolean());
|
132
|
-
}
|
133
|
-
catch (MessageTypeException e) {
|
134
|
-
throw new JsonRecordValidateException(e);
|
135
|
-
}
|
136
|
-
}
|
137
|
-
}
|
138
|
-
|
139
|
-
@Override
|
140
|
-
public void longColumn(Column column)
|
141
|
-
{
|
142
|
-
Value v = record.get(getColumnNameValue(column));
|
143
|
-
if (isNil(v)) {
|
144
|
-
pageBuilder.setNull(column);
|
145
|
-
}
|
146
|
-
else {
|
147
|
-
try {
|
148
|
-
pageBuilder.setLong(column, ((IntegerValue) v).asLong());
|
149
|
-
}
|
150
|
-
catch (MessageTypeException e) {
|
151
|
-
throw new JsonRecordValidateException(e);
|
152
|
-
}
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
@Override
|
157
|
-
public void doubleColumn(Column column)
|
158
|
-
{
|
159
|
-
Value v = record.get(getColumnNameValue(column));
|
160
|
-
if (isNil(v)) {
|
161
|
-
pageBuilder.setNull(column);
|
162
|
-
}
|
163
|
-
else {
|
164
|
-
try {
|
165
|
-
pageBuilder.setDouble(column, ((FloatValue) v).toDouble());
|
166
|
-
}
|
167
|
-
catch (MessageTypeException e) {
|
168
|
-
throw new JsonRecordValidateException(e);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
@Override
|
174
|
-
public void stringColumn(Column column)
|
175
|
-
{
|
176
|
-
Value v = record.get(getColumnNameValue(column));
|
177
|
-
if (isNil(v)) {
|
178
|
-
pageBuilder.setNull(column);
|
179
|
-
}
|
180
|
-
else {
|
181
|
-
try {
|
182
|
-
pageBuilder.setString(column, v.toString());
|
183
|
-
}
|
184
|
-
catch (MessageTypeException e) {
|
185
|
-
throw new JsonRecordValidateException(e);
|
186
|
-
}
|
187
|
-
}
|
188
|
-
}
|
189
|
-
|
190
|
-
@Override
|
191
|
-
public void timestampColumn(Column column)
|
192
|
-
{
|
193
|
-
Value v = record.get(getColumnNameValue(column));
|
194
|
-
if (isNil(v)) {
|
195
|
-
pageBuilder.setNull(column);
|
196
|
-
}
|
197
|
-
else {
|
198
|
-
try {
|
199
|
-
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v.toString()));
|
200
|
-
}
|
201
|
-
catch (MessageTypeException e) {
|
202
|
-
throw new JsonRecordValidateException(e);
|
203
|
-
}
|
204
|
-
}
|
205
|
-
}
|
206
|
-
|
207
|
-
@Override
|
208
|
-
public void jsonColumn(Column column)
|
209
|
-
{
|
210
|
-
Value v = record.get(getColumnNameValue(column));
|
211
|
-
if (isNil(v)) {
|
212
|
-
pageBuilder.setNull(column);
|
213
|
-
}
|
214
|
-
else {
|
215
|
-
try {
|
216
|
-
pageBuilder.setJson(column, v);
|
217
|
-
}
|
218
|
-
catch (MessageTypeException e) {
|
219
|
-
throw new JsonRecordValidateException(e);
|
220
|
-
}
|
221
|
-
}
|
222
|
-
}
|
223
|
-
|
224
|
-
private boolean isNil(Value v)
|
225
|
-
{
|
226
|
-
return v == null || v.isNilValue();
|
227
|
-
}
|
228
|
-
});
|
140
|
+
for (Column column : schema.getColumns()) {
|
141
|
+
Value v = record.get(getColumnNameValue(column));
|
142
|
+
CommonColumnGetter columnGetter = columnGetters.get(column.getName());
|
143
|
+
columnGetter.setValue(v);
|
144
|
+
column.visit(columnGetter);
|
145
|
+
}
|
229
146
|
|
230
147
|
pageBuilder.addRecord();
|
231
148
|
}
|
@@ -267,17 +184,17 @@ public class JsonlParserPlugin
|
|
267
184
|
return new JsonParser();
|
268
185
|
}
|
269
186
|
|
270
|
-
static
|
271
|
-
extends DataException
|
187
|
+
private static JsonlColumnOption columnOptionOf(Map<String, JsonlColumnOption> columnOptions, String columnName)
|
272
188
|
{
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
189
|
+
return Optional.fromNullable(columnOptions.get(columnName)).or(
|
190
|
+
// default column option
|
191
|
+
new Supplier<JsonlColumnOption>()
|
192
|
+
{
|
193
|
+
public JsonlColumnOption get()
|
194
|
+
{
|
195
|
+
return Exec.newConfigSource().loadConfig(JsonlColumnOption.class);
|
196
|
+
}
|
197
|
+
});
|
282
198
|
}
|
199
|
+
|
283
200
|
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.spi.ColumnConfig;
|
4
|
+
import org.embulk.spi.PageBuilder;
|
5
|
+
import org.embulk.spi.time.TimestampParser;
|
6
|
+
|
7
|
+
public class ColumnGetterFactory {
|
8
|
+
private PageBuilder pageBuilder;
|
9
|
+
private TimestampParser[] timestampParsers;
|
10
|
+
|
11
|
+
public ColumnGetterFactory(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
|
12
|
+
this.pageBuilder = pageBuilder;
|
13
|
+
this.timestampParsers = timestampParsers;
|
14
|
+
}
|
15
|
+
|
16
|
+
public CommonColumnGetter newColumnGetter(ColumnConfig columnConfig) {
|
17
|
+
switch (columnConfig.getType().getName()) {
|
18
|
+
case "string":
|
19
|
+
return new StringColumnGetter(pageBuilder, timestampParsers);
|
20
|
+
default:
|
21
|
+
return new CommonColumnGetter(pageBuilder, timestampParsers);
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
@@ -0,0 +1,131 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.parser.jsonl.JsonRecordValidateException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.ColumnVisitor;
|
6
|
+
import org.embulk.spi.PageBuilder;
|
7
|
+
import org.embulk.spi.time.TimestampParser;
|
8
|
+
import org.msgpack.core.MessageTypeException;
|
9
|
+
import org.msgpack.value.BooleanValue;
|
10
|
+
import org.msgpack.value.FloatValue;
|
11
|
+
import org.msgpack.value.IntegerValue;
|
12
|
+
import org.msgpack.value.Value;
|
13
|
+
|
14
|
+
public class CommonColumnGetter implements ColumnVisitor {
|
15
|
+
protected final PageBuilder pageBuilder;
|
16
|
+
protected final TimestampParser[] timestampParsers;
|
17
|
+
|
18
|
+
protected Value value;
|
19
|
+
|
20
|
+
public CommonColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
21
|
+
{
|
22
|
+
this.pageBuilder = pageBuilder;
|
23
|
+
this.timestampParsers = timestampParsers;
|
24
|
+
}
|
25
|
+
|
26
|
+
public void setValue(Value value)
|
27
|
+
{
|
28
|
+
this.value = value;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public void booleanColumn(Column column)
|
33
|
+
{
|
34
|
+
if (isNil(value)) {
|
35
|
+
pageBuilder.setNull(column);
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
try {
|
39
|
+
pageBuilder.setBoolean(column, ((BooleanValue) value).getBoolean());
|
40
|
+
}
|
41
|
+
catch (MessageTypeException e) {
|
42
|
+
throw new JsonRecordValidateException(e);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
@Override
|
48
|
+
public void longColumn(Column column)
|
49
|
+
{
|
50
|
+
if (isNil(value)) {
|
51
|
+
pageBuilder.setNull(column);
|
52
|
+
}
|
53
|
+
else {
|
54
|
+
try {
|
55
|
+
pageBuilder.setLong(column, ((IntegerValue) value).asLong());
|
56
|
+
}
|
57
|
+
catch (MessageTypeException e) {
|
58
|
+
throw new JsonRecordValidateException(e);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
@Override
|
64
|
+
public void doubleColumn(Column column)
|
65
|
+
{
|
66
|
+
if (isNil(value)) {
|
67
|
+
pageBuilder.setNull(column);
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
try {
|
71
|
+
pageBuilder.setDouble(column, ((FloatValue) value).toDouble());
|
72
|
+
}
|
73
|
+
catch (MessageTypeException e) {
|
74
|
+
throw new JsonRecordValidateException(e);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void stringColumn(Column column)
|
81
|
+
{
|
82
|
+
if (isNil(value)) {
|
83
|
+
pageBuilder.setNull(column);
|
84
|
+
}
|
85
|
+
else {
|
86
|
+
try {
|
87
|
+
pageBuilder.setString(column, value.toString());
|
88
|
+
}
|
89
|
+
catch (MessageTypeException e) {
|
90
|
+
throw new JsonRecordValidateException(e);
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
@Override
|
96
|
+
public void timestampColumn(Column column)
|
97
|
+
{
|
98
|
+
if (isNil(value)) {
|
99
|
+
pageBuilder.setNull(column);
|
100
|
+
}
|
101
|
+
else {
|
102
|
+
try {
|
103
|
+
pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(value.toString()));
|
104
|
+
}
|
105
|
+
catch (MessageTypeException e) {
|
106
|
+
throw new JsonRecordValidateException(e);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
@Override
|
112
|
+
public void jsonColumn(Column column)
|
113
|
+
{
|
114
|
+
if (isNil(value)) {
|
115
|
+
pageBuilder.setNull(column);
|
116
|
+
}
|
117
|
+
else {
|
118
|
+
try {
|
119
|
+
pageBuilder.setJson(column, value);
|
120
|
+
}
|
121
|
+
catch (MessageTypeException e) {
|
122
|
+
throw new JsonRecordValidateException(e);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
protected boolean isNil(Value v)
|
128
|
+
{
|
129
|
+
return v == null || v.isNilValue();
|
130
|
+
}
|
131
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
package org.embulk.parser.jsonl.getter;
|
2
|
+
|
3
|
+
import org.embulk.parser.jsonl.JsonRecordValidateException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.time.TimestampParser;
|
7
|
+
import org.msgpack.core.MessageTypeException;
|
8
|
+
|
9
|
+
public class StringColumnGetter extends CommonColumnGetter {
|
10
|
+
|
11
|
+
public StringColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
12
|
+
{
|
13
|
+
super(pageBuilder, timestampParsers);
|
14
|
+
}
|
15
|
+
|
16
|
+
private String getValueAsString()
|
17
|
+
{
|
18
|
+
return value.toString();
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void booleanColumn(Column column)
|
23
|
+
{
|
24
|
+
if (isNil(value)) {
|
25
|
+
pageBuilder.setNull(column);
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
try {
|
29
|
+
pageBuilder.setBoolean(column, Boolean.valueOf(getValueAsString()));
|
30
|
+
}
|
31
|
+
catch (MessageTypeException e) {
|
32
|
+
throw new JsonRecordValidateException(e);
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void longColumn(Column column)
|
39
|
+
{
|
40
|
+
if (isNil(value)) {
|
41
|
+
pageBuilder.setNull(column);
|
42
|
+
}
|
43
|
+
else {
|
44
|
+
try {
|
45
|
+
pageBuilder.setLong(column, Long.valueOf(getValueAsString()));
|
46
|
+
}
|
47
|
+
catch (MessageTypeException | NumberFormatException e) {
|
48
|
+
throw new JsonRecordValidateException(e);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void doubleColumn(Column column)
|
55
|
+
{
|
56
|
+
if (isNil(value)) {
|
57
|
+
pageBuilder.setNull(column);
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
try {
|
61
|
+
pageBuilder.setDouble(column, Double.valueOf(getValueAsString()));
|
62
|
+
}
|
63
|
+
catch (MessageTypeException | NumberFormatException e) {
|
64
|
+
throw new JsonRecordValidateException(e);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
}
|
@@ -3,10 +3,12 @@ package org.embulk.parser.jsonl;
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
4
|
import com.google.common.collect.Lists;
|
5
5
|
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigLoader;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.config.TaskSource;
|
8
9
|
import org.embulk.spi.ColumnConfig;
|
9
10
|
import org.embulk.spi.DataException;
|
11
|
+
import org.embulk.spi.Exec;
|
10
12
|
import org.embulk.spi.FileInput;
|
11
13
|
import org.embulk.spi.ParserPlugin;
|
12
14
|
import org.embulk.spi.Schema;
|
@@ -21,6 +23,7 @@ import org.junit.Rule;
|
|
21
23
|
import org.junit.Test;
|
22
24
|
|
23
25
|
import java.io.ByteArrayInputStream;
|
26
|
+
import java.io.File;
|
24
27
|
import java.io.IOException;
|
25
28
|
import java.io.InputStream;
|
26
29
|
import java.util.List;
|
@@ -176,11 +179,57 @@ public class TestJsonlParserPlugin
|
|
176
179
|
}
|
177
180
|
}
|
178
181
|
|
182
|
+
@Test
|
183
|
+
public void useColumnOptions()
|
184
|
+
throws Exception
|
185
|
+
{
|
186
|
+
|
187
|
+
SchemaConfig schema = schema(
|
188
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE));
|
189
|
+
File yamlFile = getResourceFile("use_column_options.yml");
|
190
|
+
ConfigSource config = getConfigFromYamlFile(yamlFile);
|
191
|
+
|
192
|
+
transaction(config, fileInput(
|
193
|
+
"{\"_c0\":\"true\",\"_c1\":\"10\",\"_c2\":\"0.1\"}",
|
194
|
+
"{\"_c0\":\"false\",\"_c1\":\"-10\",\"_c2\":\"1.0\"}"
|
195
|
+
));
|
196
|
+
|
197
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
198
|
+
assertEquals(2, records.size());
|
199
|
+
|
200
|
+
Object[] record;
|
201
|
+
{
|
202
|
+
record = records.get(0);
|
203
|
+
assertEquals(true, record[0]);
|
204
|
+
assertEquals(10L, record[1]);
|
205
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
206
|
+
}
|
207
|
+
{
|
208
|
+
record = records.get(1);
|
209
|
+
assertEquals(false, record[0]);
|
210
|
+
assertEquals(-10L, record[1]);
|
211
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
179
215
|
private ConfigSource config()
|
180
216
|
{
|
181
217
|
return runtime.getExec().newConfigSource();
|
182
218
|
}
|
183
219
|
|
220
|
+
private File getResourceFile(String resourceName)
|
221
|
+
throws IOException
|
222
|
+
{
|
223
|
+
return new File(this.getClass().getResource(resourceName).getFile());
|
224
|
+
}
|
225
|
+
|
226
|
+
private ConfigSource getConfigFromYamlFile(File yamlFile)
|
227
|
+
throws IOException
|
228
|
+
{
|
229
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
230
|
+
return loader.fromYamlFile(yamlFile);
|
231
|
+
}
|
232
|
+
|
184
233
|
private void transaction(ConfigSource config, final FileInput input)
|
185
234
|
{
|
186
235
|
plugin.transaction(config, new ParserPlugin.Control()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shunsuke Mikami
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,6 +53,8 @@ files:
|
|
53
53
|
- Rakefile
|
54
54
|
- build.gradle
|
55
55
|
- embulk-parser-jsonl.gemspec
|
56
|
+
- example/example.yml
|
57
|
+
- example/sample.json
|
56
58
|
- gradle/wrapper/gradle-wrapper.jar
|
57
59
|
- gradle/wrapper/gradle-wrapper.properties
|
58
60
|
- gradlew
|
@@ -60,9 +62,15 @@ files:
|
|
60
62
|
- lib/embulk/guess/jsonl.rb
|
61
63
|
- lib/embulk/parser/jsonl.rb
|
62
64
|
- settings.gradle
|
65
|
+
- src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java
|
66
|
+
- src/main/java/org/embulk/parser/jsonl/JsonlColumnOption.java
|
63
67
|
- src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java
|
68
|
+
- src/main/java/org/embulk/parser/jsonl/getter/ColumnGetterFactory.java
|
69
|
+
- src/main/java/org/embulk/parser/jsonl/getter/CommonColumnGetter.java
|
70
|
+
- src/main/java/org/embulk/parser/jsonl/getter/StringColumnGetter.java
|
64
71
|
- src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java
|
65
|
-
-
|
72
|
+
- src/test/resources/org/embulk/parser/jsonl/use_column_options.yml
|
73
|
+
- classpath/embulk-parser-jsonl-0.1.2.jar
|
66
74
|
homepage: https://github.com/shun0102/embulk-parser-jsonl
|
67
75
|
licenses:
|
68
76
|
- MIT
|