embulk-parser-msgpack 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/ChangeLog +5 -0
- data/README.md +1 -1
- data/build.gradle +17 -3
- data/src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java +125 -41
- data/src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java +465 -0
- metadata +15 -14
- data/src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b4c775c2942e56f3df0d9a6af992220c4fa1d4a
|
4
|
+
data.tar.gz: ef29ab00cffc8e5f5df887586cfd83e0bfafd955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e52e2c9ffdfaa491eb1298e31bbbe55661c06ef0b3b41f1230713dfaa8bb1c46984ffa907c4ae9f445229eeb91fb9ee17af10e43e0844d12bda2598130a4a5d3
|
7
|
+
data.tar.gz: aa5690f19cc16469e8935e8599e61eb68602ffd1512152ec78be437bdb70a7f3018f020d8aaa5c14eefe6d720b09d7c2d3da93528c430752e42439edf69ef7d0
|
data/.travis.yml
ADDED
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -11,7 +11,7 @@ Parses files encoded in MessagePack.
|
|
11
11
|
|
12
12
|
- **row_encoding**: type of a row. "array" or "map" (enum, default: map)
|
13
13
|
- **file_encoding**: if a file includes a big array, set "array". Otherwise, if a file includes sequence of rows, set "sequence" (enum, default: sequence)
|
14
|
-
- **columns**: description (schema,
|
14
|
+
- **columns**: description (schema, default: a single Json typed column)
|
15
15
|
|
16
16
|
## Example
|
17
17
|
|
data/build.gradle
CHANGED
@@ -3,6 +3,8 @@ plugins {
|
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
|
+
id "findbugs"
|
7
|
+
id "jacoco"
|
6
8
|
}
|
7
9
|
import com.github.jrubygradle.JRubyExec
|
8
10
|
repositories {
|
@@ -14,15 +16,17 @@ configurations {
|
|
14
16
|
provided
|
15
17
|
}
|
16
18
|
|
17
|
-
version = "0.2.
|
19
|
+
version = "0.2.2"
|
18
20
|
|
19
21
|
sourceCompatibility = 1.7
|
20
22
|
targetCompatibility = 1.7
|
21
23
|
|
22
24
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8.
|
24
|
-
provided "org.embulk:embulk-core:0.8.
|
25
|
+
compile "org.embulk:embulk-core:0.8.14"
|
26
|
+
provided "org.embulk:embulk-core:0.8.14"
|
25
27
|
testCompile "junit:junit:4.+"
|
28
|
+
testCompile "org.embulk:embulk-core:0.8.14:tests"
|
29
|
+
testCompile "org.embulk:embulk-standards:0.8.14"
|
26
30
|
}
|
27
31
|
|
28
32
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -49,6 +53,16 @@ task checkstyle(type: Checkstyle) {
|
|
49
53
|
source = sourceSets.main.allJava + sourceSets.test.allJava
|
50
54
|
}
|
51
55
|
|
56
|
+
tasks.withType(FindBugs) {
|
57
|
+
reports {
|
58
|
+
xml.enabled = false
|
59
|
+
html.enabled = true
|
60
|
+
}
|
61
|
+
}
|
62
|
+
findbugs {
|
63
|
+
ignoreFailures = true
|
64
|
+
}
|
65
|
+
|
52
66
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
53
67
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
54
68
|
script "${project.name}.gemspec"
|
@@ -6,16 +6,23 @@ import java.util.TreeMap;
|
|
6
6
|
import java.util.Comparator;
|
7
7
|
import java.io.IOException;
|
8
8
|
import java.io.EOFException;
|
9
|
+
|
10
|
+
import com.google.common.annotations.VisibleForTesting;
|
9
11
|
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.collect.ImmutableList;
|
10
13
|
import com.google.common.collect.ImmutableMap;
|
11
14
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
12
15
|
import com.fasterxml.jackson.annotation.JsonValue;
|
16
|
+
import com.google.common.collect.Lists;
|
17
|
+
import org.embulk.spi.Exec;
|
18
|
+
import org.embulk.spi.type.Types;
|
13
19
|
import org.msgpack.core.MessagePack;
|
14
20
|
import org.msgpack.core.MessageFormat;
|
15
21
|
import org.msgpack.core.MessageUnpacker;
|
16
22
|
import org.msgpack.core.MessageInsufficientBufferException;
|
17
23
|
import org.msgpack.core.buffer.MessageBuffer;
|
18
24
|
import org.msgpack.core.buffer.MessageBufferInput;
|
25
|
+
import org.msgpack.value.Value;
|
19
26
|
import org.msgpack.value.ValueType;
|
20
27
|
import org.embulk.config.Config;
|
21
28
|
import org.embulk.config.ConfigException;
|
@@ -58,6 +65,9 @@ import org.embulk.spi.util.dynamic.JsonColumnSetter;
|
|
58
65
|
import org.embulk.spi.util.dynamic.DefaultValueSetter;
|
59
66
|
import org.embulk.spi.util.dynamic.NullDefaultValueSetter;
|
60
67
|
|
68
|
+
import static org.embulk.spi.Exec.newConfigSource;
|
69
|
+
import static org.embulk.spi.type.Types.*;
|
70
|
+
|
61
71
|
public class MsgpackParserPlugin
|
62
72
|
implements ParserPlugin
|
63
73
|
{
|
@@ -73,10 +83,14 @@ public class MsgpackParserPlugin
|
|
73
83
|
public RowEncoding getRowEncoding();
|
74
84
|
|
75
85
|
@Config("columns")
|
76
|
-
|
86
|
+
@ConfigDefault("null")
|
87
|
+
public Optional<SchemaConfig> getSchemaConfig();
|
77
88
|
|
78
89
|
@ConfigInject
|
79
90
|
public BufferAllocator getBufferAllocator();
|
91
|
+
|
92
|
+
public void setSchemafulMode(boolean v);
|
93
|
+
public boolean getSchemafulMode();
|
80
94
|
}
|
81
95
|
|
82
96
|
public static enum FileEncoding
|
@@ -195,7 +209,30 @@ public class MsgpackParserPlugin
|
|
195
209
|
{
|
196
210
|
PluginTask task = config.loadConfig(PluginTask.class);
|
197
211
|
|
198
|
-
|
212
|
+
if (!task.getSchemaConfig().isPresent()) {
|
213
|
+
// If columns: is not set, the parser behaves as non-schemaful mode. It doesn't care of row encoding.
|
214
|
+
if (config.has("row_encoding")) {
|
215
|
+
throw new ConfigException("Setting row_encoding: is invalid if columns: is not set.");
|
216
|
+
}
|
217
|
+
task.setSchemafulMode(false);
|
218
|
+
}
|
219
|
+
else {
|
220
|
+
task.setSchemafulMode(true);
|
221
|
+
}
|
222
|
+
|
223
|
+
control.run(task.dump(), getSchemaConfig(task).toSchema());
|
224
|
+
}
|
225
|
+
|
226
|
+
@VisibleForTesting
|
227
|
+
SchemaConfig getSchemaConfig(PluginTask task)
|
228
|
+
{
|
229
|
+
Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
|
230
|
+
if (schemaConfig.isPresent()) {
|
231
|
+
return schemaConfig.get();
|
232
|
+
}
|
233
|
+
else {
|
234
|
+
return new SchemaConfig(ImmutableList.of(new ColumnConfig("record", JSON, newConfigSource())));
|
235
|
+
}
|
199
236
|
}
|
200
237
|
|
201
238
|
@Override
|
@@ -204,41 +241,75 @@ public class MsgpackParserPlugin
|
|
204
241
|
{
|
205
242
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
206
243
|
|
207
|
-
|
244
|
+
boolean schemafulMode = task.getSchemafulMode();
|
208
245
|
FileEncoding fileEncoding = task.getFileEncoding();
|
209
246
|
|
210
247
|
try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new FileInputMessageBufferInput(input));
|
211
248
|
PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
|
212
249
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
switch (rowEncoding) {
|
219
|
-
case ARRAY:
|
220
|
-
reader = new ArrayRowReader(setters);
|
221
|
-
break;
|
222
|
-
case MAP:
|
223
|
-
reader = new MapRowReader(setters);
|
224
|
-
break;
|
225
|
-
default:
|
226
|
-
throw new IllegalArgumentException("Unexpected row encoding");
|
227
|
-
}
|
250
|
+
if (schemafulMode) {
|
251
|
+
RowEncoding rowEncoding = task.getRowEncoding();
|
252
|
+
TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
|
253
|
+
Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
|
254
|
+
getSchemaConfig(task), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
|
228
255
|
|
229
|
-
|
230
|
-
switch (
|
231
|
-
case SEQUENCE:
|
232
|
-
// do nothing
|
233
|
-
break;
|
256
|
+
RowReader reader;
|
257
|
+
switch (rowEncoding) {
|
234
258
|
case ARRAY:
|
235
|
-
|
236
|
-
|
259
|
+
reader = new ArrayRowReader(setters);
|
260
|
+
break;
|
261
|
+
case MAP:
|
262
|
+
reader = new MapRowReader(setters);
|
237
263
|
break;
|
264
|
+
default:
|
265
|
+
throw new IllegalArgumentException("Unexpected row encoding");
|
266
|
+
}
|
267
|
+
|
268
|
+
while (input.nextFile()) {
|
269
|
+
switch (fileEncoding) {
|
270
|
+
case SEQUENCE:
|
271
|
+
// do nothing
|
272
|
+
break;
|
273
|
+
case ARRAY:
|
274
|
+
// skip array header to convert array to sequence
|
275
|
+
unpacker.unpackArrayHeader();
|
276
|
+
break;
|
277
|
+
}
|
278
|
+
|
279
|
+
while (reader.next(unpacker)) {
|
280
|
+
pageBuilder.addRecord();
|
281
|
+
}
|
238
282
|
}
|
283
|
+
}
|
284
|
+
else {
|
285
|
+
// If non-schemaful mode, setters is not created.
|
286
|
+
while (input.nextFile()) {
|
287
|
+
switch (fileEncoding) {
|
288
|
+
case SEQUENCE:
|
289
|
+
// do nothing
|
290
|
+
break;
|
291
|
+
case ARRAY:
|
292
|
+
// skip array header to convert array to sequence
|
293
|
+
unpacker.unpackArrayHeader();
|
294
|
+
break;
|
295
|
+
}
|
239
296
|
|
240
|
-
|
241
|
-
|
297
|
+
while (true) {
|
298
|
+
Value v;
|
299
|
+
try {
|
300
|
+
v = unpacker.unpackValue();
|
301
|
+
if (v == null) {
|
302
|
+
break;
|
303
|
+
}
|
304
|
+
}
|
305
|
+
catch (MessageInsufficientBufferException e) {
|
306
|
+
break;
|
307
|
+
}
|
308
|
+
|
309
|
+
// The unpacked Value object is set to a page as a Json column value.
|
310
|
+
pageBuilder.setJson(0, v);
|
311
|
+
pageBuilder.addRecord();
|
312
|
+
}
|
242
313
|
}
|
243
314
|
}
|
244
315
|
|
@@ -264,29 +335,35 @@ public class MsgpackParserPlugin
|
|
264
335
|
if (type instanceof BooleanType) {
|
265
336
|
setter = new BooleanColumnSetter(pageBuilder, column, defaultValue);
|
266
337
|
|
267
|
-
}
|
338
|
+
}
|
339
|
+
else if (type instanceof LongType) {
|
268
340
|
setter = new LongColumnSetter(pageBuilder, column, defaultValue);
|
269
341
|
|
270
|
-
}
|
342
|
+
}
|
343
|
+
else if (type instanceof DoubleType) {
|
271
344
|
setter = new DoubleColumnSetter(pageBuilder, column, defaultValue);
|
272
345
|
|
273
|
-
}
|
346
|
+
}
|
347
|
+
else if (type instanceof StringType) {
|
274
348
|
TimestampFormatter formatter = new TimestampFormatter(formatterTask,
|
275
349
|
Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
|
276
350
|
setter = new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
|
277
351
|
|
278
|
-
}
|
352
|
+
}
|
353
|
+
else if (type instanceof TimestampType) {
|
279
354
|
// TODO use flexible time format like Ruby's Time.parse
|
280
355
|
TimestampParser parser = timestampParsers[column.getIndex()];
|
281
356
|
setter = new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
|
282
357
|
|
283
|
-
}
|
358
|
+
}
|
359
|
+
else if (type instanceof JsonType) {
|
284
360
|
TimestampFormatter formatter = new TimestampFormatter(formatterTask,
|
285
361
|
Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
|
286
362
|
setter = new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
|
287
363
|
|
288
|
-
}
|
289
|
-
|
364
|
+
}
|
365
|
+
else {
|
366
|
+
throw new ConfigException("Unknown column type: " + type);
|
290
367
|
}
|
291
368
|
|
292
369
|
builder.put(column, setter);
|
@@ -317,10 +394,12 @@ public class MsgpackParserPlugin
|
|
317
394
|
BigInteger bi = unpacker.unpackBigInteger();
|
318
395
|
if (0 <= bi.compareTo(LONG_MIN) && bi.compareTo(LONG_MAX) <= 0) {
|
319
396
|
setter.set(bi.longValue());
|
320
|
-
}
|
397
|
+
}
|
398
|
+
else {
|
321
399
|
setter.setNull(); // TODO set default value
|
322
400
|
}
|
323
|
-
}
|
401
|
+
}
|
402
|
+
else {
|
324
403
|
setter.set(unpacker.unpackLong());
|
325
404
|
}
|
326
405
|
break;
|
@@ -372,14 +451,16 @@ public class MsgpackParserPlugin
|
|
372
451
|
int n;
|
373
452
|
try {
|
374
453
|
n = unpacker.unpackArrayHeader();
|
375
|
-
}
|
454
|
+
}
|
455
|
+
catch (MessageInsufficientBufferException ex) {
|
376
456
|
// TODO EOFException?
|
377
457
|
return false;
|
378
458
|
}
|
379
459
|
for (int i = 0; i < n; i++) {
|
380
460
|
if (i < columnSetters.length) {
|
381
461
|
unpackToSetter(unpacker, columnSetters[i]);
|
382
|
-
}
|
462
|
+
}
|
463
|
+
else {
|
383
464
|
unpacker.skipValue();
|
384
465
|
}
|
385
466
|
}
|
@@ -405,7 +486,8 @@ public class MsgpackParserPlugin
|
|
405
486
|
int n;
|
406
487
|
try {
|
407
488
|
n = unpacker.unpackMapHeader();
|
408
|
-
}
|
489
|
+
}
|
490
|
+
catch (MessageInsufficientBufferException ex) {
|
409
491
|
// TODO EOFException?
|
410
492
|
return false;
|
411
493
|
}
|
@@ -421,7 +503,8 @@ public class MsgpackParserPlugin
|
|
421
503
|
DynamicColumnSetter setter = columnSetters.get(key);
|
422
504
|
if (setter != null) {
|
423
505
|
unpackToSetter(unpacker, setter);
|
424
|
-
}
|
506
|
+
}
|
507
|
+
else {
|
425
508
|
unpacker.skipValue();
|
426
509
|
}
|
427
510
|
}
|
@@ -455,7 +538,8 @@ public class MsgpackParserPlugin
|
|
455
538
|
offset += 1;
|
456
539
|
}
|
457
540
|
return 0;
|
458
|
-
}
|
541
|
+
}
|
542
|
+
else {
|
459
543
|
return o1.size() - o2.size();
|
460
544
|
}
|
461
545
|
}
|
@@ -0,0 +1,465 @@
|
|
1
|
+
package org.embulk.parser.msgpack;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin;
|
10
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.FileEncoding;
|
11
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.PluginTask;
|
12
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.RowEncoding;
|
13
|
+
import org.embulk.spi.ColumnConfig;
|
14
|
+
import org.embulk.spi.FileInput;
|
15
|
+
import org.embulk.spi.FileInputRunner;
|
16
|
+
import org.embulk.spi.ParserPlugin;
|
17
|
+
import org.embulk.spi.Schema;
|
18
|
+
import org.embulk.spi.SchemaConfig;
|
19
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
20
|
+
import org.embulk.spi.time.Timestamp;
|
21
|
+
import org.embulk.spi.type.Type;
|
22
|
+
import org.embulk.spi.type.Types;
|
23
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
24
|
+
import org.embulk.spi.util.Pages;
|
25
|
+
import org.embulk.standards.LocalFileInputPlugin;
|
26
|
+
import org.junit.Before;
|
27
|
+
import org.junit.Rule;
|
28
|
+
import org.junit.Test;
|
29
|
+
import org.msgpack.core.MessagePack;
|
30
|
+
import org.msgpack.core.MessagePacker;
|
31
|
+
import org.msgpack.value.ArrayValue;
|
32
|
+
import org.msgpack.value.Value;
|
33
|
+
|
34
|
+
import java.io.ByteArrayInputStream;
|
35
|
+
import java.io.ByteArrayOutputStream;
|
36
|
+
import java.io.IOException;
|
37
|
+
import java.io.InputStream;
|
38
|
+
import java.util.List;
|
39
|
+
import java.util.Random;
|
40
|
+
|
41
|
+
import static org.junit.Assert.assertEquals;
|
42
|
+
import static org.junit.Assert.assertTrue;
|
43
|
+
|
44
|
+
public class TestMsgpackParserPlugin
|
45
|
+
{
|
46
|
+
@Rule
|
47
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
48
|
+
|
49
|
+
private ConfigSource config;
|
50
|
+
private Random random;
|
51
|
+
private MsgpackParserPlugin plugin;
|
52
|
+
private FileInputRunner runner;
|
53
|
+
private MockPageOutput output;
|
54
|
+
|
55
|
+
@Before
|
56
|
+
public void createResources()
|
57
|
+
{
|
58
|
+
config = config().set("type", "msgpack");
|
59
|
+
random = runtime.getRandom();
|
60
|
+
plugin = new MsgpackParserPlugin();
|
61
|
+
runner = new FileInputRunner(new LocalFileInputPlugin());
|
62
|
+
output = new MockPageOutput();
|
63
|
+
}
|
64
|
+
|
65
|
+
@Test
|
66
|
+
public void checkDefaultValues()
|
67
|
+
{
|
68
|
+
ConfigSource config = this.config.deepCopy();
|
69
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
70
|
+
assertEquals(FileEncoding.SEQUENCE, task.getFileEncoding());
|
71
|
+
assertEquals(RowEncoding.MAP, task.getRowEncoding());
|
72
|
+
|
73
|
+
// columns
|
74
|
+
SchemaConfig schemaConfig = plugin.getSchemaConfig(task);
|
75
|
+
assertEquals(1, schemaConfig.getColumnCount());
|
76
|
+
assertEquals(Types.JSON, schemaConfig.getColumnType(0));
|
77
|
+
}
|
78
|
+
|
79
|
+
@Test(expected = ConfigException.class)
|
80
|
+
public void throwConfigErrorByInvalidFileEncoding()
|
81
|
+
{
|
82
|
+
ConfigSource config = this.config.deepCopy()
|
83
|
+
.set("columns", sampleSchema())
|
84
|
+
.set("file_encoding", "invalid");
|
85
|
+
config.loadConfig(PluginTask.class);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Test(expected = ConfigException.class)
|
89
|
+
public void throwConfigErrorByInvalidRowEncoding()
|
90
|
+
{
|
91
|
+
ConfigSource config = this.config.deepCopy()
|
92
|
+
.set("columns", sampleSchema())
|
93
|
+
.set("row_encoding", "invalid");
|
94
|
+
config.loadConfig(PluginTask.class);
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test(expected = ConfigException.class)
|
98
|
+
public void throwConfigErrorIfSchemalessWithInvalidRowEncoding()
|
99
|
+
{
|
100
|
+
ConfigSource config = this.config.deepCopy()
|
101
|
+
.set("row_encoding", "invalid");
|
102
|
+
config.loadConfig(PluginTask.class);
|
103
|
+
}
|
104
|
+
|
105
|
+
@Test
|
106
|
+
public void parseArrayArray()
|
107
|
+
throws IOException
|
108
|
+
{
|
109
|
+
SchemaConfig schema = schema(
|
110
|
+
column("_c_boolean", Types.BOOLEAN),
|
111
|
+
column("_c_string", Types.STRING),
|
112
|
+
column("_c_json", Types.JSON),
|
113
|
+
column("_c_double", Types.DOUBLE),
|
114
|
+
column("_c_long", Types.LONG),
|
115
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
116
|
+
);
|
117
|
+
ConfigSource config = this.config.deepCopy()
|
118
|
+
.set("columns", schema)
|
119
|
+
.set("file_encoding", "array")
|
120
|
+
.set("row_encoding", "array");
|
121
|
+
|
122
|
+
boolean vBoolean = random.nextBoolean();
|
123
|
+
String vString = nextString(random, random.nextInt(100));
|
124
|
+
double vDouble = random.nextDouble();
|
125
|
+
long vLong = random.nextLong();
|
126
|
+
String vJson = nextString(random, random.nextInt(100));
|
127
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
128
|
+
|
129
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
130
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
131
|
+
pk.packArrayHeader(1)
|
132
|
+
.packArrayHeader(schema.getColumnCount()) // 1 record
|
133
|
+
.packBoolean(vBoolean)
|
134
|
+
.packString(vString)
|
135
|
+
.packString(vJson)
|
136
|
+
.packDouble(vDouble)
|
137
|
+
.packLong(vLong)
|
138
|
+
.packLong(vTimestamp);
|
139
|
+
}
|
140
|
+
|
141
|
+
try (FileInput in = input(out.toByteArray())) {
|
142
|
+
transaction(config, input(out.toByteArray()), output);
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
147
|
+
assertEquals(1, records.size());
|
148
|
+
for (Object[] record : records) {
|
149
|
+
assertEquals(schema.getColumnCount(), record.length);
|
150
|
+
assertEquals(vBoolean, record[0]);
|
151
|
+
assertEquals(vString, record[1]);
|
152
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
153
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
154
|
+
assertEquals(vLong, record[4]);
|
155
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
@Test
|
160
|
+
public void parseSequenceArray()
|
161
|
+
throws IOException
|
162
|
+
{
|
163
|
+
SchemaConfig schema = schema(
|
164
|
+
column("_c_boolean", Types.BOOLEAN),
|
165
|
+
column("_c_string", Types.STRING),
|
166
|
+
column("_c_json", Types.JSON),
|
167
|
+
column("_c_double", Types.DOUBLE),
|
168
|
+
column("_c_long", Types.LONG),
|
169
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
170
|
+
);
|
171
|
+
ConfigSource config = this.config.deepCopy()
|
172
|
+
.set("columns", schema)
|
173
|
+
.set("file_encoding", "sequence")
|
174
|
+
.set("row_encoding", "array");
|
175
|
+
|
176
|
+
boolean vBoolean = random.nextBoolean();
|
177
|
+
String vString = nextString(random, random.nextInt(100));
|
178
|
+
double vDouble = random.nextDouble();
|
179
|
+
long vLong = random.nextLong();
|
180
|
+
String vJson = nextString(random, random.nextInt(100));
|
181
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
182
|
+
|
183
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
184
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
185
|
+
pk.packArrayHeader(schema.getColumnCount()) // 1 record
|
186
|
+
.packBoolean(vBoolean)
|
187
|
+
.packString(vString)
|
188
|
+
.packString(vJson)
|
189
|
+
.packDouble(vDouble)
|
190
|
+
.packLong(vLong)
|
191
|
+
.packLong(vTimestamp);
|
192
|
+
}
|
193
|
+
|
194
|
+
try (FileInput in = input(out.toByteArray())) {
|
195
|
+
transaction(config, input(out.toByteArray()), output);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
200
|
+
assertEquals(1, records.size());
|
201
|
+
for (Object[] record : records) {
|
202
|
+
assertEquals(schema.getColumnCount(), record.length);
|
203
|
+
assertEquals(vBoolean, record[0]);
|
204
|
+
assertEquals(vString, record[1]);
|
205
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
206
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
207
|
+
assertEquals(vLong, record[4]);
|
208
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
@Test
|
213
|
+
public void parseSequentialSchemalessData()
|
214
|
+
throws IOException
|
215
|
+
{
|
216
|
+
SchemaConfig schema = schema(column("record", Types.JSON));
|
217
|
+
ConfigSource config = this.config.deepCopy().set("file_encoding", "sequence");
|
218
|
+
|
219
|
+
boolean vBoolean = random.nextBoolean();
|
220
|
+
String vString = nextString(random, random.nextInt(100));
|
221
|
+
double vDouble = random.nextDouble();
|
222
|
+
long vLong = random.nextLong();
|
223
|
+
String vJson = nextString(random, random.nextInt(100));
|
224
|
+
|
225
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
226
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
227
|
+
pk.packArrayHeader(5) // 1 record
|
228
|
+
.packBoolean(vBoolean)
|
229
|
+
.packString(vString)
|
230
|
+
.packString(vJson)
|
231
|
+
.packDouble(vDouble)
|
232
|
+
.packLong(vLong);
|
233
|
+
}
|
234
|
+
|
235
|
+
try (FileInput in = input(out.toByteArray())) {
|
236
|
+
transaction(config, input(out.toByteArray()), output);
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
241
|
+
assertEquals(1, records.size());
|
242
|
+
for (Object[] record : records) {
|
243
|
+
assertEquals(1, record.length);
|
244
|
+
assertTrue(((Value) record[0]).isArrayValue());
|
245
|
+
ArrayValue v = ((Value) record[0]).asArrayValue();
|
246
|
+
assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
|
247
|
+
assertEquals(vString, v.get(1).asStringValue().asString());
|
248
|
+
assertEquals(vJson, v.get(2).asStringValue().asString());
|
249
|
+
assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
|
250
|
+
assertEquals(vLong, v.get(4).asIntegerValue().toLong());
|
251
|
+
}
|
252
|
+
}
|
253
|
+
|
254
|
+
@Test
|
255
|
+
public void parseSequenceMap()
|
256
|
+
throws IOException
|
257
|
+
{
|
258
|
+
SchemaConfig schema = schema(
|
259
|
+
column("_c_boolean", Types.BOOLEAN),
|
260
|
+
column("_c_string", Types.STRING),
|
261
|
+
column("_c_json", Types.JSON),
|
262
|
+
column("_c_double", Types.DOUBLE),
|
263
|
+
column("_c_long", Types.LONG),
|
264
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
265
|
+
);
|
266
|
+
ConfigSource config = this.config.deepCopy()
|
267
|
+
.set("columns", schema)
|
268
|
+
.set("file_encoding", "sequence")
|
269
|
+
.set("row_encoding", "map");
|
270
|
+
|
271
|
+
boolean vBoolean = random.nextBoolean();
|
272
|
+
String vString = nextString(random, random.nextInt(100));
|
273
|
+
double vDouble = random.nextDouble();
|
274
|
+
long vLong = random.nextLong();
|
275
|
+
String vJson = nextString(random, random.nextInt(100));
|
276
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
277
|
+
|
278
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
279
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
280
|
+
pk.packMapHeader(schema.getColumnCount()) // 1 record
|
281
|
+
.packString(schema.getColumnName(0)).packBoolean(vBoolean)
|
282
|
+
.packString(schema.getColumnName(1)).packString(vString)
|
283
|
+
.packString(schema.getColumnName(2)).packString(vJson)
|
284
|
+
.packString(schema.getColumnName(3)).packDouble(vDouble)
|
285
|
+
.packString(schema.getColumnName(4)).packLong(vLong)
|
286
|
+
.packString(schema.getColumnName(5)).packLong(vTimestamp);
|
287
|
+
}
|
288
|
+
|
289
|
+
try (FileInput in = input(out.toByteArray())) {
|
290
|
+
transaction(config, input(out.toByteArray()), output);
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
295
|
+
assertEquals(1, records.size());
|
296
|
+
for (Object[] record : records) {
|
297
|
+
assertEquals(schema.getColumnCount(), record.length);
|
298
|
+
assertEquals(vBoolean, record[0]);
|
299
|
+
assertEquals(vString, record[1]);
|
300
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
301
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
302
|
+
assertEquals(vLong, record[4]);
|
303
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
@Test
|
308
|
+
public void parseArrayMap()
|
309
|
+
throws IOException
|
310
|
+
{
|
311
|
+
SchemaConfig schema = schema(
|
312
|
+
column("_c_boolean", Types.BOOLEAN),
|
313
|
+
column("_c_string", Types.STRING),
|
314
|
+
column("_c_json", Types.JSON),
|
315
|
+
column("_c_double", Types.DOUBLE),
|
316
|
+
column("_c_long", Types.LONG),
|
317
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
318
|
+
);
|
319
|
+
ConfigSource config = this.config.deepCopy()
|
320
|
+
.set("columns", schema)
|
321
|
+
.set("file_encoding", "array")
|
322
|
+
.set("row_encoding", "map");
|
323
|
+
|
324
|
+
boolean vBoolean = random.nextBoolean();
|
325
|
+
String vString = nextString(random, random.nextInt(100));
|
326
|
+
double vDouble = random.nextDouble();
|
327
|
+
long vLong = random.nextLong();
|
328
|
+
String vJson = nextString(random, random.nextInt(100));
|
329
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
330
|
+
|
331
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
332
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
333
|
+
pk.packArrayHeader(1)
|
334
|
+
.packMapHeader(schema.getColumnCount()) // 1 record
|
335
|
+
.packString(schema.getColumnName(0)).packBoolean(vBoolean)
|
336
|
+
.packString(schema.getColumnName(1)).packString(vString)
|
337
|
+
.packString(schema.getColumnName(2)).packString(vJson)
|
338
|
+
.packString(schema.getColumnName(3)).packDouble(vDouble)
|
339
|
+
.packString(schema.getColumnName(4)).packLong(vLong)
|
340
|
+
.packString(schema.getColumnName(5)).packLong(vTimestamp);
|
341
|
+
}
|
342
|
+
|
343
|
+
try (FileInput in = input(out.toByteArray())) {
|
344
|
+
transaction(config, input(out.toByteArray()), output);
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
349
|
+
assertEquals(1, records.size());
|
350
|
+
for (Object[] record : records) {
|
351
|
+
assertEquals(schema.getColumnCount(), record.length);
|
352
|
+
assertEquals(vBoolean, record[0]);
|
353
|
+
assertEquals(vString, record[1]);
|
354
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
355
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
356
|
+
assertEquals(vLong, record[4]);
|
357
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
358
|
+
}
|
359
|
+
}
|
360
|
+
|
361
|
+
@Test
|
362
|
+
public void parseArraySchemalessData()
|
363
|
+
throws IOException
|
364
|
+
{
|
365
|
+
SchemaConfig schema = schema(column("record", Types.JSON));
|
366
|
+
ConfigSource config = this.config.deepCopy().set("file_encoding", "array");
|
367
|
+
|
368
|
+
boolean vBoolean = random.nextBoolean();
|
369
|
+
String vString = nextString(random, random.nextInt(100));
|
370
|
+
double vDouble = random.nextDouble();
|
371
|
+
long vLong = random.nextLong();
|
372
|
+
String vJson = nextString(random, random.nextInt(100));
|
373
|
+
|
374
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
375
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
376
|
+
pk.packArrayHeader(1)
|
377
|
+
.packArrayHeader(5) // 1 record
|
378
|
+
.packBoolean(vBoolean)
|
379
|
+
.packString(vString)
|
380
|
+
.packString(vJson)
|
381
|
+
.packDouble(vDouble)
|
382
|
+
.packLong(vLong);
|
383
|
+
}
|
384
|
+
|
385
|
+
try (FileInput in = input(out.toByteArray())) {
|
386
|
+
transaction(config, input(out.toByteArray()), output);
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
391
|
+
assertEquals(1, records.size());
|
392
|
+
for (Object[] record : records) {
|
393
|
+
assertEquals(1, record.length);
|
394
|
+
assertTrue(((Value) record[0]).isArrayValue());
|
395
|
+
ArrayValue v = ((Value) record[0]).asArrayValue();
|
396
|
+
assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
|
397
|
+
assertEquals(vString, v.get(1).asStringValue().asString());
|
398
|
+
assertEquals(vJson, v.get(2).asStringValue().asString());
|
399
|
+
assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
|
400
|
+
assertEquals(vLong, v.get(4).asIntegerValue().toLong());
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
private ConfigSource config()
|
405
|
+
{
|
406
|
+
return runtime.getExec().newConfigSource();
|
407
|
+
}
|
408
|
+
|
409
|
+
private SchemaConfig sampleSchema()
|
410
|
+
{
|
411
|
+
return schema(column("_c0", Types.STRING));
|
412
|
+
}
|
413
|
+
|
414
|
+
private SchemaConfig schema(ColumnConfig... columns)
|
415
|
+
{
|
416
|
+
return new SchemaConfig(Lists.newArrayList(columns));
|
417
|
+
}
|
418
|
+
|
419
|
+
private ColumnConfig column(String name, Type type)
|
420
|
+
{
|
421
|
+
return column(name, type, config());
|
422
|
+
}
|
423
|
+
|
424
|
+
private ColumnConfig column(String name, Type type, ConfigSource config)
|
425
|
+
{
|
426
|
+
return new ColumnConfig(name, type, config);
|
427
|
+
}
|
428
|
+
|
429
|
+
private void transaction(ConfigSource config, final FileInput input, final MockPageOutput output)
|
430
|
+
{
|
431
|
+
plugin.transaction(config, new ParserPlugin.Control()
|
432
|
+
{
|
433
|
+
@Override
|
434
|
+
public void run(TaskSource taskSource, Schema schema)
|
435
|
+
{
|
436
|
+
plugin.run(taskSource, schema, input, output);
|
437
|
+
}
|
438
|
+
});
|
439
|
+
}
|
440
|
+
|
441
|
+
private FileInput input(byte[] bytes)
|
442
|
+
{
|
443
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(new ByteArrayInputStream(bytes)));
|
444
|
+
}
|
445
|
+
|
446
|
+
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
447
|
+
{
|
448
|
+
return new InputStreamFileInput.IteratorProvider(ImmutableList.copyOf(inputStreams));
|
449
|
+
}
|
450
|
+
|
451
|
+
private static String nextString(Random random, int lengthBound)
|
452
|
+
{
|
453
|
+
char[] text = new char[lengthBound];
|
454
|
+
for (int i = 0; i < text.length; i++) {
|
455
|
+
text[i] = (char) random.nextInt(255);
|
456
|
+
}
|
457
|
+
return new String(text);
|
458
|
+
}
|
459
|
+
|
460
|
+
private static long nextUnixtime(Random random, String baseTime, int bound)
|
461
|
+
{
|
462
|
+
long baseUnixtime = java.sql.Timestamp.valueOf(baseTime).getTime();
|
463
|
+
return baseUnixtime + random.nextInt(bound);
|
464
|
+
}
|
465
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-msgpack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses files encoded in MessagePack.
|
42
42
|
email:
|
43
43
|
- frsyuki@gmail.com
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- COPYING
|
50
51
|
- ChangeLog
|
51
52
|
- README.md
|
@@ -59,8 +60,8 @@ files:
|
|
59
60
|
- lib/embulk/guess/msgpack.rb
|
60
61
|
- lib/embulk/parser/msgpack.rb
|
61
62
|
- src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java
|
62
|
-
- src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java
|
63
|
-
- classpath/embulk-parser-msgpack-0.2.
|
63
|
+
- src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java
|
64
|
+
- classpath/embulk-parser-msgpack-0.2.2.jar
|
64
65
|
homepage: https://github.com/frsyuki/embulk-parser-msgpack
|
65
66
|
licenses:
|
66
67
|
- Apache 2.0
|