embulk-parser-jsonline 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +42 -0
- data/Rakefile +3 -0
- data/bench/gen_dummy.rb +5 -0
- data/bench/typecast.yml +17 -0
- data/bench/without_typecast.yml +17 -0
- data/build.gradle +79 -0
- data/classpath/embulk-parser-jsonline-0.2.2.jar +0 -0
- data/embulk-parser-jsonl.gemspec +19 -0
- data/example/compat.yml +21 -0
- data/example/example.yml +18 -0
- data/example/example_without_typecast.yml +18 -0
- data/example/sample.json +2 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/guess/jsonl.rb +32 -0
- data/lib/embulk/parser/jsonl.rb +3 -0
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/parser/jsonl/ColumnCaster.java +97 -0
- data/src/main/java/org/embulk/parser/jsonl/ColumnVisitorImpl.java +164 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java +22 -0
- data/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +225 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/BooleanCast.java +39 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/DoubleCast.java +41 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/JsonCast.java +40 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/LongCast.java +47 -0
- data/src/main/java/org/embulk/parser/jsonl/cast/StringCast.java +82 -0
- data/src/test/java/org/embulk/parser/jsonl/TestColumnCaster.java +256 -0
- data/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +278 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestBooleanCast.java +56 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestDoubleCast.java +50 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestJsonCast.java +80 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestLongCast.java +42 -0
- data/src/test/java/org/embulk/parser/jsonl/cast/TestStringCast.java +103 -0
- data/src/test/resources/org/embulk/parser/jsonl/use_column_options.yml +9 -0
- metadata +112 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableSet;
|
4
|
+
import org.embulk.spi.DataException;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampParseException;
|
7
|
+
import org.embulk.spi.time.TimestampParser;
|
8
|
+
|
9
|
+
public class StringCast
|
10
|
+
{
|
11
|
+
// copy from csv plugin
|
12
|
+
public static final ImmutableSet<String> TRUE_STRINGS =
|
13
|
+
ImmutableSet.of(
|
14
|
+
"true", "True", "TRUE",
|
15
|
+
"yes", "Yes", "YES",
|
16
|
+
"t", "T", "y", "Y",
|
17
|
+
"on", "On", "ON",
|
18
|
+
"1");
|
19
|
+
|
20
|
+
public static final ImmutableSet<String> FALSE_STRINGS =
|
21
|
+
ImmutableSet.of(
|
22
|
+
"false", "False", "FALSE",
|
23
|
+
"no", "No", "NO",
|
24
|
+
"f", "F", "n", "N",
|
25
|
+
"off", "Off", "OFF",
|
26
|
+
"0");
|
27
|
+
|
28
|
+
private StringCast() {}
|
29
|
+
|
30
|
+
private static String buildErrorMessage(String as, String value)
|
31
|
+
{
|
32
|
+
return String.format("cannot cast String to %s: \"%s\"", as, value);
|
33
|
+
}
|
34
|
+
|
35
|
+
public static boolean asBoolean(String value) throws DataException
|
36
|
+
{
|
37
|
+
if (TRUE_STRINGS.contains(value)) {
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
else if (FALSE_STRINGS.contains(value)) {
|
41
|
+
return false;
|
42
|
+
}
|
43
|
+
else {
|
44
|
+
throw new DataException(buildErrorMessage("boolean", value));
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
public static long asLong(String value) throws DataException
|
49
|
+
{
|
50
|
+
try {
|
51
|
+
return Long.parseLong(value);
|
52
|
+
}
|
53
|
+
catch (NumberFormatException ex) {
|
54
|
+
throw new DataException(buildErrorMessage("long", value), ex);
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
public static double asDouble(String value) throws DataException
|
59
|
+
{
|
60
|
+
try {
|
61
|
+
return Double.parseDouble(value);
|
62
|
+
}
|
63
|
+
catch (NumberFormatException ex) {
|
64
|
+
throw new DataException(buildErrorMessage("double", value), ex);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
public static String asString(String value) throws DataException
|
69
|
+
{
|
70
|
+
return value;
|
71
|
+
}
|
72
|
+
|
73
|
+
public static Timestamp asTimestamp(String value, TimestampParser parser) throws DataException
|
74
|
+
{
|
75
|
+
try {
|
76
|
+
return parser.parse(value);
|
77
|
+
}
|
78
|
+
catch (TimestampParseException ex) {
|
79
|
+
throw new DataException(buildErrorMessage("timestamp", value), ex);
|
80
|
+
}
|
81
|
+
}
|
82
|
+
}
|
@@ -0,0 +1,256 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
import org.embulk.spi.DataException;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampParser;
|
7
|
+
import org.joda.time.DateTimeZone;
|
8
|
+
import org.jruby.embed.ScriptingContainer;
|
9
|
+
import org.junit.Before;
|
10
|
+
import org.junit.Rule;
|
11
|
+
import org.junit.Test;
|
12
|
+
import org.msgpack.value.MapValue;
|
13
|
+
import org.msgpack.value.Value;
|
14
|
+
import org.msgpack.value.ValueFactory;
|
15
|
+
|
16
|
+
import static org.junit.Assert.assertEquals;
|
17
|
+
import static org.junit.Assert.assertTrue;
|
18
|
+
import static org.junit.Assert.fail;
|
19
|
+
|
20
|
+
public class TestColumnCaster
|
21
|
+
{
|
22
|
+
@Rule
|
23
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
24
|
+
public MapValue mapValue;
|
25
|
+
public DataException thrown;
|
26
|
+
public ScriptingContainer jruby;
|
27
|
+
public TimestampParser parser;
|
28
|
+
|
29
|
+
@Before
|
30
|
+
public void createResource()
|
31
|
+
{
|
32
|
+
jruby = new ScriptingContainer();
|
33
|
+
thrown = new DataException("any");
|
34
|
+
Value[] kvs = new Value[2];
|
35
|
+
kvs[0] = ValueFactory.newString("k");
|
36
|
+
kvs[1] = ValueFactory.newString("v");
|
37
|
+
mapValue = ValueFactory.newMap(kvs);
|
38
|
+
parser = new TimestampParser(jruby, "%Y-%m-%d %H:%M:%S.%N", DateTimeZone.UTC);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Test
|
42
|
+
public void asBooleanFromBoolean()
|
43
|
+
{
|
44
|
+
assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newBoolean(true)));
|
45
|
+
}
|
46
|
+
|
47
|
+
@Test
|
48
|
+
public void asBooleanFromInteger()
|
49
|
+
{
|
50
|
+
assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newInteger(1)));
|
51
|
+
try {
|
52
|
+
ColumnCaster.asBoolean(ValueFactory.newInteger(2));
|
53
|
+
fail();
|
54
|
+
}
|
55
|
+
catch (Throwable t) {
|
56
|
+
assertTrue(t instanceof DataException);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
@Test
|
61
|
+
public void asBooleanFromFloat()
|
62
|
+
{
|
63
|
+
try {
|
64
|
+
ColumnCaster.asBoolean(ValueFactory.newFloat(1.1));
|
65
|
+
fail();
|
66
|
+
}
|
67
|
+
catch (Throwable t) {
|
68
|
+
assertTrue(t instanceof DataException);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
@Test
|
73
|
+
public void asBooleanFromString()
|
74
|
+
{
|
75
|
+
assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newString("true")));
|
76
|
+
try {
|
77
|
+
ColumnCaster.asBoolean(ValueFactory.newString("foo"));
|
78
|
+
fail();
|
79
|
+
}
|
80
|
+
catch (Throwable t) {
|
81
|
+
assertTrue(t instanceof DataException);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
@Test
|
86
|
+
public void asBooleanFromJson()
|
87
|
+
{
|
88
|
+
try {
|
89
|
+
ColumnCaster.asBoolean(mapValue);
|
90
|
+
fail();
|
91
|
+
}
|
92
|
+
catch (Throwable t) {
|
93
|
+
assertTrue(t instanceof DataException);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void asLongFromBoolean()
|
99
|
+
{
|
100
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newBoolean(true)));
|
101
|
+
}
|
102
|
+
|
103
|
+
@Test
|
104
|
+
public void asLongFromInteger()
|
105
|
+
{
|
106
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newInteger(1)));
|
107
|
+
}
|
108
|
+
|
109
|
+
@Test
|
110
|
+
public void asLongFromFloat()
|
111
|
+
{
|
112
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newFloat(1.5)));
|
113
|
+
}
|
114
|
+
|
115
|
+
@Test
|
116
|
+
public void asLongFromString()
|
117
|
+
{
|
118
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newString("1")));
|
119
|
+
try {
|
120
|
+
ColumnCaster.asLong(ValueFactory.newString("foo"));
|
121
|
+
fail();
|
122
|
+
}
|
123
|
+
catch (Throwable t) {
|
124
|
+
assertTrue(t instanceof DataException);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
@Test
|
129
|
+
public void asLongFromJson()
|
130
|
+
{
|
131
|
+
try {
|
132
|
+
ColumnCaster.asLong(mapValue);
|
133
|
+
fail();
|
134
|
+
}
|
135
|
+
catch (Throwable t) {
|
136
|
+
assertTrue(t instanceof DataException);
|
137
|
+
}
|
138
|
+
}
|
139
|
+
|
140
|
+
@Test
|
141
|
+
public void asDoubleFromBoolean()
|
142
|
+
{
|
143
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newBoolean(true)));
|
144
|
+
}
|
145
|
+
|
146
|
+
@Test
|
147
|
+
public void asDoubleFromInteger()
|
148
|
+
{
|
149
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newInteger(1)));
|
150
|
+
}
|
151
|
+
|
152
|
+
@Test
|
153
|
+
public void asDoubleFromFloat()
|
154
|
+
{
|
155
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newFloat(1.5)));
|
156
|
+
}
|
157
|
+
|
158
|
+
@Test
|
159
|
+
public void asDoubleFromString()
|
160
|
+
{
|
161
|
+
assertEquals(1, ColumnCaster.asLong(ValueFactory.newString("1")));
|
162
|
+
try {
|
163
|
+
ColumnCaster.asLong(ValueFactory.newString("foo"));
|
164
|
+
fail();
|
165
|
+
}
|
166
|
+
catch (Throwable t) {
|
167
|
+
assertTrue(t instanceof DataException);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
@Test
|
172
|
+
public void asDoubleFromJson()
|
173
|
+
{
|
174
|
+
try {
|
175
|
+
ColumnCaster.asLong(mapValue);
|
176
|
+
fail();
|
177
|
+
}
|
178
|
+
catch (Throwable t) {
|
179
|
+
assertTrue(t instanceof DataException);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
@Test
|
184
|
+
public void asStringFromBoolean()
|
185
|
+
{
|
186
|
+
assertEquals("true", ColumnCaster.asString(ValueFactory.newBoolean(true)));
|
187
|
+
}
|
188
|
+
|
189
|
+
@Test
|
190
|
+
public void asStringFromInteger()
|
191
|
+
{
|
192
|
+
assertEquals("1", ColumnCaster.asString(ValueFactory.newInteger(1)));
|
193
|
+
}
|
194
|
+
|
195
|
+
@Test
|
196
|
+
public void asStringFromFloat()
|
197
|
+
{
|
198
|
+
assertEquals("1.5", ColumnCaster.asString(ValueFactory.newFloat(1.5)));
|
199
|
+
}
|
200
|
+
|
201
|
+
@Test
|
202
|
+
public void asStringFromString()
|
203
|
+
{
|
204
|
+
assertEquals("1", ColumnCaster.asString(ValueFactory.newString("1")));
|
205
|
+
}
|
206
|
+
|
207
|
+
@Test
|
208
|
+
public void asStringFromJson()
|
209
|
+
{
|
210
|
+
assertEquals("{\"k\":\"v\"}", ColumnCaster.asString(mapValue));
|
211
|
+
}
|
212
|
+
|
213
|
+
@Test
|
214
|
+
public void asTimestampFromBoolean()
|
215
|
+
{
|
216
|
+
try {
|
217
|
+
ColumnCaster.asTimestamp(ValueFactory.newBoolean(true), parser);
|
218
|
+
fail();
|
219
|
+
}
|
220
|
+
catch (Throwable t) {
|
221
|
+
assertTrue(t instanceof DataException);
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
@Test
|
226
|
+
public void asTimestampFromInteger()
|
227
|
+
{
|
228
|
+
assertEquals(1, ColumnCaster.asTimestamp(ValueFactory.newInteger(1), parser).getEpochSecond());
|
229
|
+
}
|
230
|
+
|
231
|
+
@Test
|
232
|
+
public void asTimestampFromFloat()
|
233
|
+
{
|
234
|
+
Timestamp expected = Timestamp.ofEpochSecond(1463084053, 500000000);
|
235
|
+
assertEquals(expected, ColumnCaster.asTimestamp(ValueFactory.newFloat(1463084053.5), parser));
|
236
|
+
}
|
237
|
+
|
238
|
+
@Test
|
239
|
+
public void asTimestampFromString()
|
240
|
+
{
|
241
|
+
Timestamp expected = Timestamp.ofEpochSecond(1463084053, 500000000);
|
242
|
+
assertEquals(expected, ColumnCaster.asTimestamp(ValueFactory.newString("2016-05-12 20:14:13.5"), parser));
|
243
|
+
}
|
244
|
+
|
245
|
+
@Test
|
246
|
+
public void asTimestampFromJson()
|
247
|
+
{
|
248
|
+
try {
|
249
|
+
ColumnCaster.asTimestamp(mapValue, parser);
|
250
|
+
fail();
|
251
|
+
}
|
252
|
+
catch (Throwable t) {
|
253
|
+
assertTrue(t instanceof DataException);
|
254
|
+
}
|
255
|
+
}
|
256
|
+
}
|
@@ -0,0 +1,278 @@
|
|
1
|
+
package org.embulk.parser.jsonl;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigLoader;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.spi.ColumnConfig;
|
10
|
+
import org.embulk.spi.DataException;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.FileInput;
|
13
|
+
import org.embulk.spi.ParserPlugin;
|
14
|
+
import org.embulk.spi.Schema;
|
15
|
+
import org.embulk.spi.SchemaConfig;
|
16
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
17
|
+
import org.embulk.spi.time.Timestamp;
|
18
|
+
import org.embulk.spi.type.Type;
|
19
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
20
|
+
import org.embulk.spi.util.Pages;
|
21
|
+
import org.junit.Before;
|
22
|
+
import org.junit.Rule;
|
23
|
+
import org.junit.Test;
|
24
|
+
|
25
|
+
import java.io.ByteArrayInputStream;
|
26
|
+
import java.io.File;
|
27
|
+
import java.io.IOException;
|
28
|
+
import java.io.InputStream;
|
29
|
+
import java.util.List;
|
30
|
+
|
31
|
+
import static org.embulk.spi.type.Types.BOOLEAN;
|
32
|
+
import static org.embulk.spi.type.Types.DOUBLE;
|
33
|
+
import static org.embulk.spi.type.Types.JSON;
|
34
|
+
import static org.embulk.spi.type.Types.LONG;
|
35
|
+
import static org.embulk.spi.type.Types.STRING;
|
36
|
+
import static org.embulk.spi.type.Types.TIMESTAMP;
|
37
|
+
import static org.junit.Assert.assertEquals;
|
38
|
+
import static org.junit.Assert.assertNull;
|
39
|
+
import static org.junit.Assert.assertTrue;
|
40
|
+
import static org.junit.Assert.fail;
|
41
|
+
import static org.msgpack.value.ValueFactory.newArray;
|
42
|
+
import static org.msgpack.value.ValueFactory.newMap;
|
43
|
+
import static org.msgpack.value.ValueFactory.newString;
|
44
|
+
|
45
|
+
public class TestJsonlParserPlugin
|
46
|
+
{
|
47
|
+
@Rule
|
48
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
49
|
+
|
50
|
+
private ConfigSource config;
|
51
|
+
private JsonlParserPlugin plugin;
|
52
|
+
private MockPageOutput output;
|
53
|
+
|
54
|
+
@Before
|
55
|
+
public void createResource()
|
56
|
+
{
|
57
|
+
config = config().set("type", "jsonl");
|
58
|
+
plugin = new JsonlParserPlugin();
|
59
|
+
recreatePageOutput();
|
60
|
+
}
|
61
|
+
|
62
|
+
private void recreatePageOutput()
|
63
|
+
{
|
64
|
+
output = new MockPageOutput();
|
65
|
+
}
|
66
|
+
|
67
|
+
@Test
|
68
|
+
public void skipRecords()
|
69
|
+
throws Exception
|
70
|
+
{
|
71
|
+
SchemaConfig schema = schema(
|
72
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
73
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
74
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
75
|
+
|
76
|
+
transaction(config, fileInput(
|
77
|
+
"[]",
|
78
|
+
"\"embulk\"",
|
79
|
+
"10",
|
80
|
+
"true",
|
81
|
+
"false",
|
82
|
+
"null",
|
83
|
+
" "
|
84
|
+
));
|
85
|
+
|
86
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
87
|
+
assertEquals(0, records.size());
|
88
|
+
}
|
89
|
+
|
90
|
+
@Test
|
91
|
+
public void throwDataException()
|
92
|
+
throws Exception
|
93
|
+
{
|
94
|
+
SchemaConfig schema = schema(
|
95
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
96
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
97
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).set("stop_on_invalid_record", true);
|
98
|
+
|
99
|
+
try {
|
100
|
+
transaction(config, fileInput(
|
101
|
+
"\"not_map_value\""
|
102
|
+
));
|
103
|
+
fail();
|
104
|
+
}
|
105
|
+
catch (Throwable t) {
|
106
|
+
assertTrue(t instanceof DataException);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
@Test
|
111
|
+
public void writeNils()
|
112
|
+
throws Exception
|
113
|
+
{
|
114
|
+
SchemaConfig schema = schema(
|
115
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
116
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
117
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
118
|
+
|
119
|
+
transaction(config, fileInput(
|
120
|
+
"{}",
|
121
|
+
"{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
|
122
|
+
"{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
|
123
|
+
"{}"
|
124
|
+
));
|
125
|
+
|
126
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
127
|
+
assertEquals(4, records.size());
|
128
|
+
|
129
|
+
for (Object[] record : records) {
|
130
|
+
for (int i = 0; i < 6; i++) {
|
131
|
+
assertNull(record[i]);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
@Test
|
137
|
+
public void useNormal()
|
138
|
+
throws Exception
|
139
|
+
{
|
140
|
+
SchemaConfig schema = schema(
|
141
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
142
|
+
column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON));
|
143
|
+
List<ConfigSource> configs = Lists.newArrayList(
|
144
|
+
this.config.deepCopy().set("columns", schema),
|
145
|
+
this.config.deepCopy().set("schema", schema)
|
146
|
+
);
|
147
|
+
|
148
|
+
for (ConfigSource config : configs) {
|
149
|
+
transaction(config, fileInput(
|
150
|
+
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
|
151
|
+
"[1, 2, 3]",
|
152
|
+
"{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}"
|
153
|
+
));
|
154
|
+
|
155
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
156
|
+
assertEquals(2, records.size());
|
157
|
+
|
158
|
+
Object[] record;
|
159
|
+
{
|
160
|
+
record = records.get(0);
|
161
|
+
assertEquals(true, record[0]);
|
162
|
+
assertEquals(10L, record[1]);
|
163
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
164
|
+
assertEquals("embulk", record[3]);
|
165
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
166
|
+
assertEquals(newMap(newString("k"), newString("v")), record[5]);
|
167
|
+
}
|
168
|
+
{
|
169
|
+
record = records.get(1);
|
170
|
+
assertEquals(false, record[0]);
|
171
|
+
assertEquals(-10L, record[1]);
|
172
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
173
|
+
assertEquals("エンバルク", record[3]);
|
174
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
175
|
+
assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
|
176
|
+
}
|
177
|
+
|
178
|
+
recreatePageOutput();
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
@Test
|
183
|
+
public void useColumnOptions()
|
184
|
+
throws Exception
|
185
|
+
{
|
186
|
+
|
187
|
+
SchemaConfig schema = schema(
|
188
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE));
|
189
|
+
File yamlFile = getResourceFile("use_column_options.yml");
|
190
|
+
ConfigSource config = getConfigFromYamlFile(yamlFile);
|
191
|
+
|
192
|
+
transaction(config, fileInput(
|
193
|
+
"{\"_c0\":\"true\",\"_c1\":\"10\",\"_c2\":\"0.1\"}",
|
194
|
+
"{\"_c0\":\"false\",\"_c1\":\"-10\",\"_c2\":\"1.0\"}"
|
195
|
+
));
|
196
|
+
|
197
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
198
|
+
assertEquals(2, records.size());
|
199
|
+
|
200
|
+
Object[] record;
|
201
|
+
{
|
202
|
+
record = records.get(0);
|
203
|
+
assertEquals(true, record[0]);
|
204
|
+
assertEquals(10L, record[1]);
|
205
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
206
|
+
}
|
207
|
+
{
|
208
|
+
record = records.get(1);
|
209
|
+
assertEquals(false, record[0]);
|
210
|
+
assertEquals(-10L, record[1]);
|
211
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
private ConfigSource config()
|
216
|
+
{
|
217
|
+
return runtime.getExec().newConfigSource();
|
218
|
+
}
|
219
|
+
|
220
|
+
private File getResourceFile(String resourceName)
|
221
|
+
throws IOException
|
222
|
+
{
|
223
|
+
return new File(this.getClass().getResource(resourceName).getFile());
|
224
|
+
}
|
225
|
+
|
226
|
+
private ConfigSource getConfigFromYamlFile(File yamlFile)
|
227
|
+
throws IOException
|
228
|
+
{
|
229
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
230
|
+
return loader.fromYamlFile(yamlFile);
|
231
|
+
}
|
232
|
+
|
233
|
+
private void transaction(ConfigSource config, final FileInput input)
|
234
|
+
{
|
235
|
+
plugin.transaction(config, new ParserPlugin.Control()
|
236
|
+
{
|
237
|
+
@Override
|
238
|
+
public void run(TaskSource taskSource, Schema schema)
|
239
|
+
{
|
240
|
+
plugin.run(taskSource, schema, input, output);
|
241
|
+
}
|
242
|
+
});
|
243
|
+
}
|
244
|
+
|
245
|
+
private FileInput fileInput(String... lines)
|
246
|
+
throws Exception
|
247
|
+
{
|
248
|
+
StringBuilder sb = new StringBuilder();
|
249
|
+
for (String line : lines) {
|
250
|
+
sb.append(line).append("\n");
|
251
|
+
}
|
252
|
+
|
253
|
+
ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes());
|
254
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
255
|
+
}
|
256
|
+
|
257
|
+
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
258
|
+
throws IOException
|
259
|
+
{
|
260
|
+
return new InputStreamFileInput.IteratorProvider(
|
261
|
+
ImmutableList.copyOf(inputStreams));
|
262
|
+
}
|
263
|
+
|
264
|
+
private SchemaConfig schema(ColumnConfig... columns)
|
265
|
+
{
|
266
|
+
return new SchemaConfig(Lists.newArrayList(columns));
|
267
|
+
}
|
268
|
+
|
269
|
+
private ColumnConfig column(String name, Type type)
|
270
|
+
{
|
271
|
+
return column(name, type, config());
|
272
|
+
}
|
273
|
+
|
274
|
+
private ColumnConfig column(String name, Type type, ConfigSource option)
|
275
|
+
{
|
276
|
+
return new ColumnConfig(name, type, option);
|
277
|
+
}
|
278
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.parser.jsonl.cast;
|
2
|
+
|
3
|
+
import org.embulk.spi.DataException;
|
4
|
+
import org.junit.Test;
|
5
|
+
|
6
|
+
import static org.junit.Assert.assertEquals;
|
7
|
+
import static org.junit.Assert.assertTrue;
|
8
|
+
import static org.junit.Assert.fail;
|
9
|
+
|
10
|
+
public class TestBooleanCast
|
11
|
+
{
|
12
|
+
@Test
|
13
|
+
public void asBoolean()
|
14
|
+
{
|
15
|
+
assertEquals(true, BooleanCast.asBoolean(true));
|
16
|
+
assertEquals(false, BooleanCast.asBoolean(false));
|
17
|
+
}
|
18
|
+
|
19
|
+
@Test
|
20
|
+
public void asLong()
|
21
|
+
{
|
22
|
+
assertEquals(1, BooleanCast.asLong(true));
|
23
|
+
assertEquals(0, BooleanCast.asLong(false));
|
24
|
+
}
|
25
|
+
|
26
|
+
@Test
|
27
|
+
public void asDouble()
|
28
|
+
{
|
29
|
+
try {
|
30
|
+
BooleanCast.asDouble(true);
|
31
|
+
fail();
|
32
|
+
}
|
33
|
+
catch (Throwable t) {
|
34
|
+
assertTrue(t instanceof DataException);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
@Test
|
39
|
+
public void asString()
|
40
|
+
{
|
41
|
+
assertEquals("true", BooleanCast.asString(true));
|
42
|
+
assertEquals("false", BooleanCast.asString(false));
|
43
|
+
}
|
44
|
+
|
45
|
+
@Test
|
46
|
+
public void asTimestamp()
|
47
|
+
{
|
48
|
+
try {
|
49
|
+
BooleanCast.asTimestamp(true);
|
50
|
+
fail();
|
51
|
+
}
|
52
|
+
catch (Throwable t) {
|
53
|
+
assertTrue(t instanceof DataException);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|