embulk-parser-jsonline 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +42 -0
- data/Rakefile +3 -0
- data/bench/gen_dummy.rb +5 -0
- data/bench/typecast.yml +17 -0
- data/bench/without_typecast.yml +17 -0
- data/build.gradle +79 -0
- data/classpath/embulk-parser-jsonline-0.2.2.jar +0 -0
- data/embulk-parser-jsonl.gemspec +19 -0
- data/example/compat.yml +21 -0
- data/example/example.yml +18 -0
- data/example/example_without_typecast.yml +18 -0
- data/example/sample.json +2 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/guess/jsonl.rb +32 -0
- data/lib/embulk/parser/jsonl.rb +3 -0
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/parser/jsonl/ColumnCaster.java +97 -0
- data/src/main/java/org/embulk/parser/jsonl/ColumnVisitorImpl.java +164 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java +22 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +225 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/BooleanCast.java +39 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/DoubleCast.java +41 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/JsonCast.java +40 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/LongCast.java +47 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/StringCast.java +82 -0
- data/src/test/java/org/embulk/parser/jsonl/TestColumnCaster.java +256 -0
- data/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +278 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestBooleanCast.java +56 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestDoubleCast.java +50 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestJsonCast.java +80 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestLongCast.java +42 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestStringCast.java +103 -0
- data/src/test/resources/org/embulk/parser/jsonl/use_column_options.yml +9 -0
- metadata +112 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.parser.jsonl.cast.BooleanCast;
|
4
|
+
import org.embulk.parser.jsonl.cast.DoubleCast;
|
5
|
+
import org.embulk.parser.jsonl.cast.JsonCast;
|
6
|
+
import org.embulk.parser.jsonl.cast.LongCast;
|
7
|
+
import org.embulk.parser.jsonl.cast.StringCast;
|
8
|
+
import org.embulk.spi.DataException;
|
9
|
+
import org.embulk.spi.time.Timestamp;
|
10
|
+
import org.embulk.spi.time.TimestampParser;
|
11
|
+
import org.msgpack.value.Value;
|
12
|
+
|
13
|
+
class ColumnCaster
|
14
|
+
{
|
15
|
+
ColumnCaster() {}
|
16
|
+
|
17
|
+
public static boolean asBoolean(Value value) throws DataException
|
18
|
+
{
|
19
|
+
if (value.isBooleanValue()) {
|
20
|
+
return value.asBooleanValue().getBoolean();
|
21
|
+
}
|
22
|
+
else if (value.isIntegerValue()) {
|
23
|
+
return LongCast.asBoolean(value.asIntegerValue().asLong());
|
24
|
+
}
|
25
|
+
else if (value.isFloatValue()) {
|
26
|
+
return DoubleCast.asBoolean(value.asFloatValue().toDouble());
|
27
|
+
}
|
28
|
+
else if (value.isStringValue()) {
|
29
|
+
return StringCast.asBoolean(value.asStringValue().asString());
|
30
|
+
}
|
31
|
+
else {
|
32
|
+
return JsonCast.asBoolean(value);
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
public static long asLong(Value value) throws DataException
|
37
|
+
{
|
38
|
+
if (value.isBooleanValue()) {
|
39
|
+
return BooleanCast.asLong(value.asBooleanValue().getBoolean());
|
40
|
+
}
|
41
|
+
else if (value.isIntegerValue()) {
|
42
|
+
return value.asIntegerValue().asLong();
|
43
|
+
}
|
44
|
+
else if (value.isFloatValue()) {
|
45
|
+
return DoubleCast.asLong(value.asFloatValue().toDouble());
|
46
|
+
}
|
47
|
+
else if (value.isStringValue()) {
|
48
|
+
return StringCast.asLong(value.asStringValue().asString());
|
49
|
+
}
|
50
|
+
else {
|
51
|
+
return JsonCast.asLong(value);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
public static double asDouble(Value value) throws DataException
|
56
|
+
{
|
57
|
+
if (value.isBooleanValue()) {
|
58
|
+
return BooleanCast.asDouble(value.asBooleanValue().getBoolean());
|
59
|
+
}
|
60
|
+
else if (value.isIntegerValue()) {
|
61
|
+
return LongCast.asDouble(value.asIntegerValue().asLong());
|
62
|
+
}
|
63
|
+
else if (value.isFloatValue()) {
|
64
|
+
return value.asFloatValue().toDouble();
|
65
|
+
}
|
66
|
+
else if (value.isStringValue()) {
|
67
|
+
return StringCast.asDouble(value.asStringValue().asString());
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
return JsonCast.asDouble(value);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
public static String asString(Value value) throws DataException
|
75
|
+
{
|
76
|
+
return value.toString();
|
77
|
+
}
|
78
|
+
|
79
|
+
public static Timestamp asTimestamp(Value value, TimestampParser parser) throws DataException
|
80
|
+
{
|
81
|
+
if (value.isBooleanValue()) {
|
82
|
+
return BooleanCast.asTimestamp(value.asBooleanValue().getBoolean());
|
83
|
+
}
|
84
|
+
else if (value.isIntegerValue()) {
|
85
|
+
return LongCast.asTimestamp(value.asIntegerValue().asLong());
|
86
|
+
}
|
87
|
+
else if (value.isFloatValue()) {
|
88
|
+
return DoubleCast.asTimestamp(value.asFloatValue().toDouble());
|
89
|
+
}
|
90
|
+
else if (value.isStringValue()) {
|
91
|
+
return StringCast.asTimestamp(value.asStringValue().asString(), parser);
|
92
|
+
}
|
93
|
+
else {
|
94
|
+
return JsonCast.asTimestamp(value);
|
95
|
+
}
|
96
|
+
}
|
97
|
+
}
|
@@ -0,0 +1,164 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.parser.jsonl.JsonlParserPlugin.PluginTask;
|
5
|
+
import org.embulk.parser.jsonl.JsonlParserPlugin.TypecastColumnOption;
|
6
|
+
|
7
|
+
import org.embulk.spi.Column;
|
8
|
+
import org.embulk.spi.ColumnConfig;
|
9
|
+
import org.embulk.spi.ColumnVisitor;
|
10
|
+
import org.embulk.spi.PageBuilder;
|
11
|
+
import org.embulk.spi.Schema;
|
12
|
+
import org.embulk.spi.SchemaConfig;
|
13
|
+
import org.embulk.spi.time.Timestamp;
|
14
|
+
import org.embulk.spi.time.TimestampParser;
|
15
|
+
import org.msgpack.core.MessageTypeException;
|
16
|
+
import org.msgpack.value.Value;
|
17
|
+
|
18
|
+
public class ColumnVisitorImpl implements ColumnVisitor {
|
19
|
+
protected final PluginTask task;
|
20
|
+
protected final Schema schema;
|
21
|
+
protected final PageBuilder pageBuilder;
|
22
|
+
protected final TimestampParser[] timestampParsers;
|
23
|
+
protected final Boolean autoTypecasts[];
|
24
|
+
|
25
|
+
protected Value value;
|
26
|
+
|
27
|
+
public ColumnVisitorImpl(PluginTask task, Schema schema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
28
|
+
{
|
29
|
+
this.task = task;
|
30
|
+
this.schema = schema;
|
31
|
+
this.pageBuilder = pageBuilder;
|
32
|
+
this.timestampParsers = timestampParsers;
|
33
|
+
this.autoTypecasts = new Boolean[schema.size()];
|
34
|
+
buildAutoTypecasts();
|
35
|
+
}
|
36
|
+
|
37
|
+
private void buildAutoTypecasts()
|
38
|
+
{
|
39
|
+
for (Column column : schema.getColumns()) {
|
40
|
+
this.autoTypecasts[column.getIndex()] = task.getDefaultTypecast();
|
41
|
+
}
|
42
|
+
|
43
|
+
Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
|
44
|
+
if (schemaConfig.isPresent()) {
|
45
|
+
for (ColumnConfig columnConfig : schemaConfig.get().getColumns()) {
|
46
|
+
TypecastColumnOption columnOption = columnConfig.getOption().loadConfig(TypecastColumnOption.class);
|
47
|
+
Boolean autoTypecast = columnOption.getTypecast().or(task.getDefaultTypecast());
|
48
|
+
Column column = schema.lookupColumn(columnConfig.getName());
|
49
|
+
this.autoTypecasts[column.getIndex()] = autoTypecast;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
public void setValue(Value value)
|
55
|
+
{
|
56
|
+
this.value = value;
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void booleanColumn(Column column)
|
61
|
+
{
|
62
|
+
if (isNil(value)) {
|
63
|
+
pageBuilder.setNull(column);
|
64
|
+
}
|
65
|
+
else {
|
66
|
+
try {
|
67
|
+
boolean booleanValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asBoolean(value) : value.asBooleanValue().getBoolean();
|
68
|
+
pageBuilder.setBoolean(column, booleanValue);
|
69
|
+
}
|
70
|
+
catch (MessageTypeException e) {
|
71
|
+
throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Boolean", value), e);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void longColumn(Column column)
|
78
|
+
{
|
79
|
+
if (isNil(value)) {
|
80
|
+
pageBuilder.setNull(column);
|
81
|
+
}
|
82
|
+
else {
|
83
|
+
try {
|
84
|
+
long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(value) : value.asIntegerValue().toLong();
|
85
|
+
pageBuilder.setLong(column, longValue);
|
86
|
+
}
|
87
|
+
catch (MessageTypeException e) {
|
88
|
+
throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Long", value), e);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public void doubleColumn(Column column)
|
95
|
+
{
|
96
|
+
if (isNil(value)) {
|
97
|
+
pageBuilder.setNull(column);
|
98
|
+
}
|
99
|
+
else {
|
100
|
+
try {
|
101
|
+
double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(value) : value.asFloatValue().toDouble();
|
102
|
+
pageBuilder.setDouble(column, doubleValue);
|
103
|
+
}
|
104
|
+
catch (MessageTypeException e) {
|
105
|
+
throw new JsonRecordValidateException(String.format("failed get \"%s\" as Double", value), e);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
@Override
|
111
|
+
public void stringColumn(Column column)
|
112
|
+
{
|
113
|
+
if (isNil(value)) {
|
114
|
+
pageBuilder.setNull(column);
|
115
|
+
}
|
116
|
+
else {
|
117
|
+
try {
|
118
|
+
String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(value) : value.asStringValue().toString();
|
119
|
+
pageBuilder.setString(column, string);
|
120
|
+
}
|
121
|
+
catch (MessageTypeException e) {
|
122
|
+
throw new JsonRecordValidateException(String.format("failed to get \"%s\" as String", value), e);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
@Override
|
128
|
+
public void timestampColumn(Column column)
|
129
|
+
{
|
130
|
+
if (isNil(value)) {
|
131
|
+
pageBuilder.setNull(column);
|
132
|
+
}
|
133
|
+
else {
|
134
|
+
try {
|
135
|
+
Timestamp timestamp = ColumnCaster.asTimestamp(value, timestampParsers[column.getIndex()]);
|
136
|
+
pageBuilder.setTimestamp(column, timestamp);
|
137
|
+
}
|
138
|
+
catch (MessageTypeException e) {
|
139
|
+
throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Timestamp", value), e);
|
140
|
+
}
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
@Override
|
145
|
+
public void jsonColumn(Column column)
|
146
|
+
{
|
147
|
+
if (isNil(value)) {
|
148
|
+
pageBuilder.setNull(column);
|
149
|
+
}
|
150
|
+
else {
|
151
|
+
try {
|
152
|
+
pageBuilder.setJson(column, value);
|
153
|
+
}
|
154
|
+
catch (MessageTypeException e) {
|
155
|
+
throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Json", value), e);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
protected boolean isNil(Value v)
|
161
|
+
{
|
162
|
+
return v == null || v.isNilValue();
|
163
|
+
}
|
164
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
|
5
|
+
public class JsonRecordValidateException
|
6
|
+
extends DataException
|
7
|
+
{
|
8
|
+
public JsonRecordValidateException(String message)
|
9
|
+
{
|
10
|
+
super(message);
|
11
|
+
}
|
12
|
+
|
13
|
+
public JsonRecordValidateException(String message, Throwable cause)
|
14
|
+
{
|
15
|
+
super(message, cause);
|
16
|
+
}
|
17
|
+
|
18
|
+
public JsonRecordValidateException(Throwable cause)
|
19
|
+
{
|
20
|
+
super(cause);
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,225 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Supplier;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import com.google.common.collect.ImmutableMap;
|
7
|
+
import org.embulk.config.Config;
|
8
|
+
import org.embulk.config.ConfigDefault;
|
9
|
+
import org.embulk.config.ConfigException;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
import org.embulk.config.Task;
|
12
|
+
import org.embulk.config.TaskSource;
|
13
|
+
import org.embulk.spi.Column;
|
14
|
+
import org.embulk.spi.ColumnConfig;
|
15
|
+
import org.embulk.spi.DataException;
|
16
|
+
import org.embulk.spi.Exec;
|
17
|
+
import org.embulk.spi.FileInput;
|
18
|
+
import org.embulk.spi.PageBuilder;
|
19
|
+
import org.embulk.spi.PageOutput;
|
20
|
+
import org.embulk.spi.ParserPlugin;
|
21
|
+
import org.embulk.spi.Schema;
|
22
|
+
import org.embulk.spi.SchemaConfig;
|
23
|
+
import org.embulk.spi.json.JsonParseException;
|
24
|
+
import org.embulk.spi.json.JsonParser;
|
25
|
+
import org.embulk.spi.time.TimestampParser;
|
26
|
+
import org.embulk.spi.type.Type;
|
27
|
+
import org.embulk.spi.util.LineDecoder;
|
28
|
+
import org.embulk.spi.util.Timestamps;
|
29
|
+
import org.msgpack.value.Value;
|
30
|
+
import org.slf4j.Logger;
|
31
|
+
|
32
|
+
import java.util.Map;
|
33
|
+
|
34
|
+
import static org.msgpack.value.ValueFactory.newString;
|
35
|
+
|
36
|
+
public class JsonlParserPlugin
|
37
|
+
implements ParserPlugin
|
38
|
+
{
|
39
|
+
@Deprecated
|
40
|
+
public interface JsonlColumnOption
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("type")
|
44
|
+
@ConfigDefault("null")
|
45
|
+
Optional<Type> getType();
|
46
|
+
}
|
47
|
+
|
48
|
+
public interface TypecastColumnOption
|
49
|
+
extends Task
|
50
|
+
{
|
51
|
+
@Config("typecast")
|
52
|
+
@ConfigDefault("null")
|
53
|
+
public Optional<Boolean> getTypecast();
|
54
|
+
}
|
55
|
+
|
56
|
+
public interface PluginTask
|
57
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
58
|
+
{
|
59
|
+
@Config("columns")
|
60
|
+
@ConfigDefault("null")
|
61
|
+
Optional<SchemaConfig> getSchemaConfig();
|
62
|
+
|
63
|
+
@Config("schema")
|
64
|
+
@ConfigDefault("null")
|
65
|
+
@Deprecated
|
66
|
+
Optional<SchemaConfig> getOldSchemaConfig();
|
67
|
+
|
68
|
+
@Config("stop_on_invalid_record")
|
69
|
+
@ConfigDefault("false")
|
70
|
+
boolean getStopOnInvalidRecord();
|
71
|
+
|
72
|
+
@Config("default_typecast")
|
73
|
+
@ConfigDefault("true")
|
74
|
+
Boolean getDefaultTypecast();
|
75
|
+
|
76
|
+
@Config("column_options")
|
77
|
+
@ConfigDefault("{}")
|
78
|
+
@Deprecated
|
79
|
+
Map<String, JsonlColumnOption> getColumnOptions();
|
80
|
+
}
|
81
|
+
|
82
|
+
private final Logger log;
|
83
|
+
|
84
|
+
private String line = null;
|
85
|
+
private long lineNumber = 0;
|
86
|
+
private Map<String, Value> columnNameValues;
|
87
|
+
|
88
|
+
public JsonlParserPlugin()
|
89
|
+
{
|
90
|
+
this.log = Exec.getLogger(JsonlParserPlugin.class);
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public void transaction(ConfigSource configSource, Control control)
|
95
|
+
{
|
96
|
+
PluginTask task = configSource.loadConfig(PluginTask.class);
|
97
|
+
|
98
|
+
if (! task.getColumnOptions().isEmpty()) {
|
99
|
+
log.warn("embulk-parser-jsonl: \"column_options\" option is deprecated, specify type directly to \"columns\" option with typecast: true (default: true).");
|
100
|
+
}
|
101
|
+
|
102
|
+
SchemaConfig schemaConfig = getSchemaConfig(task);
|
103
|
+
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
104
|
+
for (int i = 0; i < schemaConfig.getColumnCount(); i++) {
|
105
|
+
ColumnConfig columnConfig = schemaConfig.getColumn(i);
|
106
|
+
Type type = getType(task, columnConfig);
|
107
|
+
columns.add(new Column(i, columnConfig.getName(), type));
|
108
|
+
}
|
109
|
+
control.run(task.dump(), new Schema(columns.build()));
|
110
|
+
}
|
111
|
+
|
112
|
+
private static Type getType(PluginTask task, ColumnConfig columnConfig)
|
113
|
+
{
|
114
|
+
JsonlColumnOption columnOption = columnOptionOf(task.getColumnOptions(), columnConfig.getName());
|
115
|
+
return columnOption.getType().or(columnConfig.getType());
|
116
|
+
}
|
117
|
+
|
118
|
+
// this method is to keep the backward compatibility of 'schema' option.
|
119
|
+
private SchemaConfig getSchemaConfig(PluginTask task)
|
120
|
+
{
|
121
|
+
if (task.getOldSchemaConfig().isPresent()) {
|
122
|
+
log.warn("Please use 'columns' option instead of 'schema' because the 'schema' option is deprecated. The next version will stop 'schema' option support.");
|
123
|
+
}
|
124
|
+
|
125
|
+
if (task.getSchemaConfig().isPresent()) {
|
126
|
+
return task.getSchemaConfig().get();
|
127
|
+
}
|
128
|
+
else if (task.getOldSchemaConfig().isPresent()) {
|
129
|
+
return task.getOldSchemaConfig().get();
|
130
|
+
}
|
131
|
+
else {
|
132
|
+
throw new ConfigException("Attribute 'columns' is required but not set");
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
@Override
|
137
|
+
public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output)
|
138
|
+
{
|
139
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
140
|
+
|
141
|
+
setColumnNameValues(schema);
|
142
|
+
|
143
|
+
final SchemaConfig schemaConfig = getSchemaConfig(task);
|
144
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, schemaConfig);
|
145
|
+
final LineDecoder decoder = newLineDecoder(input, task);
|
146
|
+
final JsonParser jsonParser = newJsonParser();
|
147
|
+
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
148
|
+
|
149
|
+
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
150
|
+
ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers);
|
151
|
+
|
152
|
+
while (decoder.nextFile()) { // TODO this implementation should be improved with new JsonParser API on Embulk v0.8.3
|
153
|
+
lineNumber = 0;
|
154
|
+
|
155
|
+
while ((line = decoder.poll()) != null) {
|
156
|
+
lineNumber++;
|
157
|
+
|
158
|
+
try {
|
159
|
+
Value value = jsonParser.parse(line);
|
160
|
+
|
161
|
+
if (!value.isMapValue()) {
|
162
|
+
throw new JsonRecordValidateException("Json string is not representing map value.");
|
163
|
+
}
|
164
|
+
|
165
|
+
final Map<Value, Value> record = value.asMapValue().map();
|
166
|
+
for (Column column : schema.getColumns()) {
|
167
|
+
Value v = record.get(getColumnNameValue(column));
|
168
|
+
visitor.setValue(v);
|
169
|
+
column.visit(visitor);
|
170
|
+
}
|
171
|
+
|
172
|
+
pageBuilder.addRecord();
|
173
|
+
}
|
174
|
+
catch (JsonRecordValidateException | JsonParseException e) {
|
175
|
+
if (stopOnInvalidRecord) {
|
176
|
+
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, line), e);
|
177
|
+
}
|
178
|
+
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), line));
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
pageBuilder.finish();
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
private void setColumnNameValues(Schema schema)
|
188
|
+
{
|
189
|
+
ImmutableMap.Builder<String, Value> builder = ImmutableMap.builder();
|
190
|
+
for (Column column : schema.getColumns()) {
|
191
|
+
String name = column.getName();
|
192
|
+
builder.put(name, newString(name));
|
193
|
+
}
|
194
|
+
columnNameValues = builder.build();
|
195
|
+
}
|
196
|
+
|
197
|
+
private Value getColumnNameValue(Column column)
|
198
|
+
{
|
199
|
+
return columnNameValues.get(column.getName());
|
200
|
+
}
|
201
|
+
|
202
|
+
public LineDecoder newLineDecoder(FileInput input, PluginTask task)
|
203
|
+
{
|
204
|
+
return new LineDecoder(input, task);
|
205
|
+
}
|
206
|
+
|
207
|
+
public JsonParser newJsonParser()
|
208
|
+
{
|
209
|
+
return new JsonParser();
|
210
|
+
}
|
211
|
+
|
212
|
+
private static JsonlColumnOption columnOptionOf(Map<String, JsonlColumnOption> columnOptions, String columnName)
|
213
|
+
{
|
214
|
+
return Optional.fromNullable(columnOptions.get(columnName)).or(
|
215
|
+
// default column option
|
216
|
+
new Supplier<JsonlColumnOption>()
|
217
|
+
{
|
218
|
+
public JsonlColumnOption get()
|
219
|
+
{
|
220
|
+
return Exec.newConfigSource().loadConfig(JsonlColumnOption.class);
|
221
|
+
}
|
222
|
+
});
|
223
|
+
}
|
224
|
+
|
225
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
|
6
|
+
public class BooleanCast
|
7
|
+
{
|
8
|
+
private BooleanCast() {}
|
9
|
+
|
10
|
+
private static String buildErrorMessage(String as, boolean value)
|
11
|
+
{
|
12
|
+
return String.format("cannot cast boolean to %s: \"%s\"", as, value);
|
13
|
+
}
|
14
|
+
|
15
|
+
public static boolean asBoolean(boolean value) throws DataException
|
16
|
+
{
|
17
|
+
return value;
|
18
|
+
}
|
19
|
+
|
20
|
+
public static long asLong(boolean value) throws DataException
|
21
|
+
{
|
22
|
+
return value ? 1 : 0;
|
23
|
+
}
|
24
|
+
|
25
|
+
public static double asDouble(boolean value) throws DataException
|
26
|
+
{
|
27
|
+
throw new DataException(buildErrorMessage("double", value));
|
28
|
+
}
|
29
|
+
|
30
|
+
public static String asString(boolean value) throws DataException
|
31
|
+
{
|
32
|
+
return value ? "true" : "false";
|
33
|
+
}
|
34
|
+
|
35
|
+
public static Timestamp asTimestamp(boolean value) throws DataException
|
36
|
+
{
|
37
|
+
throw new DataException(buildErrorMessage("timestamp", value));
|
38
|
+
}
|
39
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
|
6
|
+
public class DoubleCast
|
7
|
+
{
|
8
|
+
private DoubleCast() {}
|
9
|
+
|
10
|
+
private static String buildErrorMessage(String as, double value)
|
11
|
+
{
|
12
|
+
return String.format("cannot cast double to %s: \"%s\"", as, value);
|
13
|
+
}
|
14
|
+
|
15
|
+
public static boolean asBoolean(double value) throws DataException
|
16
|
+
{
|
17
|
+
throw new DataException(buildErrorMessage("boolean", value));
|
18
|
+
}
|
19
|
+
|
20
|
+
public static long asLong(double value) throws DataException
|
21
|
+
{
|
22
|
+
return (long) value;
|
23
|
+
}
|
24
|
+
|
25
|
+
public static double asDouble(double value) throws DataException
|
26
|
+
{
|
27
|
+
return value;
|
28
|
+
}
|
29
|
+
|
30
|
+
public static String asString(double value) throws DataException
|
31
|
+
{
|
32
|
+
return String.valueOf(value);
|
33
|
+
}
|
34
|
+
|
35
|
+
public static Timestamp asTimestamp(double value) throws DataException
|
36
|
+
{
|
37
|
+
long epochSecond = (long) value;
|
38
|
+
long nanoAdjustMent = (long) ((value - epochSecond) * 1000000000);
|
39
|
+
return Timestamp.ofEpochSecond(epochSecond, nanoAdjustMent);
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
import org.msgpack.value.Value;
|
6
|
+
|
7
|
+
public class JsonCast
|
8
|
+
{
|
9
|
+
private JsonCast() {}
|
10
|
+
|
11
|
+
private static String buildErrorMessage(String as, Value value)
|
12
|
+
{
|
13
|
+
return String.format("cannot cast Json to %s: \"%s\"", as, value);
|
14
|
+
}
|
15
|
+
|
16
|
+
public static boolean asBoolean(Value value) throws DataException
|
17
|
+
{
|
18
|
+
throw new DataException(buildErrorMessage("boolean", value));
|
19
|
+
}
|
20
|
+
|
21
|
+
public static long asLong(Value value) throws DataException
|
22
|
+
{
|
23
|
+
throw new DataException(buildErrorMessage("long", value));
|
24
|
+
}
|
25
|
+
|
26
|
+
public static double asDouble(Value value) throws DataException
|
27
|
+
{
|
28
|
+
throw new DataException(buildErrorMessage("double", value));
|
29
|
+
}
|
30
|
+
|
31
|
+
public static String asString(Value value) throws DataException
|
32
|
+
{
|
33
|
+
return value.toString();
|
34
|
+
}
|
35
|
+
|
36
|
+
public static Timestamp asTimestamp(Value value) throws DataException
|
37
|
+
{
|
38
|
+
throw new DataException(buildErrorMessage("timestamp", value));
|
39
|
+
}
|
40
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
|
6
|
+
public class LongCast
|
7
|
+
{
|
8
|
+
private LongCast() {}
|
9
|
+
|
10
|
+
private static String buildErrorMessage(String as, long value)
|
11
|
+
{
|
12
|
+
return String.format("cannot cast long to %s: \"%s\"", as, value);
|
13
|
+
}
|
14
|
+
|
15
|
+
public static boolean asBoolean(long value) throws DataException
|
16
|
+
{
|
17
|
+
if (value == 1) {
|
18
|
+
return true;
|
19
|
+
}
|
20
|
+
else if (value == 0) {
|
21
|
+
return false;
|
22
|
+
}
|
23
|
+
else {
|
24
|
+
throw new DataException(buildErrorMessage("boolean", value));
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
public static long asLong(long value) throws DataException
|
29
|
+
{
|
30
|
+
return value;
|
31
|
+
}
|
32
|
+
|
33
|
+
public static double asDouble(long value) throws DataException
|
34
|
+
{
|
35
|
+
return (double) value;
|
36
|
+
}
|
37
|
+
|
38
|
+
public static String asString(long value) throws DataException
|
39
|
+
{
|
40
|
+
return String.valueOf(value);
|
41
|
+
}
|
42
|
+
|
43
|
+
public static Timestamp asTimestamp(long value) throws DataException
|
44
|
+
{
|
45
|
+
return Timestamp.ofEpochSecond(value);
|
46
|
+
}
|
47
|
+
}
|