embulk-parser-msgpack 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/ChangeLog +5 -0
- data/README.md +1 -1
- data/build.gradle +17 -3
- data/src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java +125 -41
- data/src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java +465 -0
- metadata +15 -14
- data/src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b4c775c2942e56f3df0d9a6af992220c4fa1d4a
|
4
|
+
data.tar.gz: ef29ab00cffc8e5f5df887586cfd83e0bfafd955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e52e2c9ffdfaa491eb1298e31bbbe55661c06ef0b3b41f1230713dfaa8bb1c46984ffa907c4ae9f445229eeb91fb9ee17af10e43e0844d12bda2598130a4a5d3
|
7
|
+
data.tar.gz: aa5690f19cc16469e8935e8599e61eb68602ffd1512152ec78be437bdb70a7f3018f020d8aaa5c14eefe6d720b09d7c2d3da93528c430752e42439edf69ef7d0
|
data/.travis.yml
ADDED
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -11,7 +11,7 @@ Parses files encoded in MessagePack.
|
|
11
11
|
|
12
12
|
- **row_encoding**: type of a row. "array" or "map" (enum, default: map)
|
13
13
|
- **file_encoding**: if a file includes a big array, set "array". Otherwise, if a file includes sequence of rows, set "sequence" (enum, default: sequence)
|
14
|
-
- **columns**: description (schema,
|
14
|
+
- **columns**: description (schema, default: a single Json typed column)
|
15
15
|
|
16
16
|
## Example
|
17
17
|
|
data/build.gradle
CHANGED
@@ -3,6 +3,8 @@ plugins {
|
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
|
+
id "findbugs"
|
7
|
+
id "jacoco"
|
6
8
|
}
|
7
9
|
import com.github.jrubygradle.JRubyExec
|
8
10
|
repositories {
|
@@ -14,15 +16,17 @@ configurations {
|
|
14
16
|
provided
|
15
17
|
}
|
16
18
|
|
17
|
-
version = "0.2.
|
19
|
+
version = "0.2.2"
|
18
20
|
|
19
21
|
sourceCompatibility = 1.7
|
20
22
|
targetCompatibility = 1.7
|
21
23
|
|
22
24
|
dependencies {
|
23
|
-
compile "org.embulk:embulk-core:0.8.
|
24
|
-
provided "org.embulk:embulk-core:0.8.
|
25
|
+
compile "org.embulk:embulk-core:0.8.14"
|
26
|
+
provided "org.embulk:embulk-core:0.8.14"
|
25
27
|
testCompile "junit:junit:4.+"
|
28
|
+
testCompile "org.embulk:embulk-core:0.8.14:tests"
|
29
|
+
testCompile "org.embulk:embulk-standards:0.8.14"
|
26
30
|
}
|
27
31
|
|
28
32
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -49,6 +53,16 @@ task checkstyle(type: Checkstyle) {
|
|
49
53
|
source = sourceSets.main.allJava + sourceSets.test.allJava
|
50
54
|
}
|
51
55
|
|
56
|
+
tasks.withType(FindBugs) {
|
57
|
+
reports {
|
58
|
+
xml.enabled = false
|
59
|
+
html.enabled = true
|
60
|
+
}
|
61
|
+
}
|
62
|
+
findbugs {
|
63
|
+
ignoreFailures = true
|
64
|
+
}
|
65
|
+
|
52
66
|
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
53
67
|
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
54
68
|
script "${project.name}.gemspec"
|
@@ -6,16 +6,23 @@ import java.util.TreeMap;
|
|
6
6
|
import java.util.Comparator;
|
7
7
|
import java.io.IOException;
|
8
8
|
import java.io.EOFException;
|
9
|
+
|
10
|
+
import com.google.common.annotations.VisibleForTesting;
|
9
11
|
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.collect.ImmutableList;
|
10
13
|
import com.google.common.collect.ImmutableMap;
|
11
14
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
12
15
|
import com.fasterxml.jackson.annotation.JsonValue;
|
16
|
+
import com.google.common.collect.Lists;
|
17
|
+
import org.embulk.spi.Exec;
|
18
|
+
import org.embulk.spi.type.Types;
|
13
19
|
import org.msgpack.core.MessagePack;
|
14
20
|
import org.msgpack.core.MessageFormat;
|
15
21
|
import org.msgpack.core.MessageUnpacker;
|
16
22
|
import org.msgpack.core.MessageInsufficientBufferException;
|
17
23
|
import org.msgpack.core.buffer.MessageBuffer;
|
18
24
|
import org.msgpack.core.buffer.MessageBufferInput;
|
25
|
+
import org.msgpack.value.Value;
|
19
26
|
import org.msgpack.value.ValueType;
|
20
27
|
import org.embulk.config.Config;
|
21
28
|
import org.embulk.config.ConfigException;
|
@@ -58,6 +65,9 @@ import org.embulk.spi.util.dynamic.JsonColumnSetter;
|
|
58
65
|
import org.embulk.spi.util.dynamic.DefaultValueSetter;
|
59
66
|
import org.embulk.spi.util.dynamic.NullDefaultValueSetter;
|
60
67
|
|
68
|
+
import static org.embulk.spi.Exec.newConfigSource;
|
69
|
+
import static org.embulk.spi.type.Types.*;
|
70
|
+
|
61
71
|
public class MsgpackParserPlugin
|
62
72
|
implements ParserPlugin
|
63
73
|
{
|
@@ -73,10 +83,14 @@ public class MsgpackParserPlugin
|
|
73
83
|
public RowEncoding getRowEncoding();
|
74
84
|
|
75
85
|
@Config("columns")
|
76
|
-
|
86
|
+
@ConfigDefault("null")
|
87
|
+
public Optional<SchemaConfig> getSchemaConfig();
|
77
88
|
|
78
89
|
@ConfigInject
|
79
90
|
public BufferAllocator getBufferAllocator();
|
91
|
+
|
92
|
+
public void setSchemafulMode(boolean v);
|
93
|
+
public boolean getSchemafulMode();
|
80
94
|
}
|
81
95
|
|
82
96
|
public static enum FileEncoding
|
@@ -195,7 +209,30 @@ public class MsgpackParserPlugin
|
|
195
209
|
{
|
196
210
|
PluginTask task = config.loadConfig(PluginTask.class);
|
197
211
|
|
198
|
-
|
212
|
+
if (!task.getSchemaConfig().isPresent()) {
|
213
|
+
// If columns: is not set, the parser behaves as non-schemaful mode. It doesn't care of row encoding.
|
214
|
+
if (config.has("row_encoding")) {
|
215
|
+
throw new ConfigException("Setting row_encoding: is invalid if columns: is not set.");
|
216
|
+
}
|
217
|
+
task.setSchemafulMode(false);
|
218
|
+
}
|
219
|
+
else {
|
220
|
+
task.setSchemafulMode(true);
|
221
|
+
}
|
222
|
+
|
223
|
+
control.run(task.dump(), getSchemaConfig(task).toSchema());
|
224
|
+
}
|
225
|
+
|
226
|
+
@VisibleForTesting
|
227
|
+
SchemaConfig getSchemaConfig(PluginTask task)
|
228
|
+
{
|
229
|
+
Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
|
230
|
+
if (schemaConfig.isPresent()) {
|
231
|
+
return schemaConfig.get();
|
232
|
+
}
|
233
|
+
else {
|
234
|
+
return new SchemaConfig(ImmutableList.of(new ColumnConfig("record", JSON, newConfigSource())));
|
235
|
+
}
|
199
236
|
}
|
200
237
|
|
201
238
|
@Override
|
@@ -204,41 +241,75 @@ public class MsgpackParserPlugin
|
|
204
241
|
{
|
205
242
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
206
243
|
|
207
|
-
|
244
|
+
boolean schemafulMode = task.getSchemafulMode();
|
208
245
|
FileEncoding fileEncoding = task.getFileEncoding();
|
209
246
|
|
210
247
|
try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new FileInputMessageBufferInput(input));
|
211
248
|
PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
|
212
249
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
switch (rowEncoding) {
|
219
|
-
case ARRAY:
|
220
|
-
reader = new ArrayRowReader(setters);
|
221
|
-
break;
|
222
|
-
case MAP:
|
223
|
-
reader = new MapRowReader(setters);
|
224
|
-
break;
|
225
|
-
default:
|
226
|
-
throw new IllegalArgumentException("Unexpected row encoding");
|
227
|
-
}
|
250
|
+
if (schemafulMode) {
|
251
|
+
RowEncoding rowEncoding = task.getRowEncoding();
|
252
|
+
TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
|
253
|
+
Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
|
254
|
+
getSchemaConfig(task), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
|
228
255
|
|
229
|
-
|
230
|
-
switch (
|
231
|
-
case SEQUENCE:
|
232
|
-
// do nothing
|
233
|
-
break;
|
256
|
+
RowReader reader;
|
257
|
+
switch (rowEncoding) {
|
234
258
|
case ARRAY:
|
235
|
-
|
236
|
-
|
259
|
+
reader = new ArrayRowReader(setters);
|
260
|
+
break;
|
261
|
+
case MAP:
|
262
|
+
reader = new MapRowReader(setters);
|
237
263
|
break;
|
264
|
+
default:
|
265
|
+
throw new IllegalArgumentException("Unexpected row encoding");
|
266
|
+
}
|
267
|
+
|
268
|
+
while (input.nextFile()) {
|
269
|
+
switch (fileEncoding) {
|
270
|
+
case SEQUENCE:
|
271
|
+
// do nothing
|
272
|
+
break;
|
273
|
+
case ARRAY:
|
274
|
+
// skip array header to convert array to sequence
|
275
|
+
unpacker.unpackArrayHeader();
|
276
|
+
break;
|
277
|
+
}
|
278
|
+
|
279
|
+
while (reader.next(unpacker)) {
|
280
|
+
pageBuilder.addRecord();
|
281
|
+
}
|
238
282
|
}
|
283
|
+
}
|
284
|
+
else {
|
285
|
+
// If non-schemaful mode, setters is not created.
|
286
|
+
while (input.nextFile()) {
|
287
|
+
switch (fileEncoding) {
|
288
|
+
case SEQUENCE:
|
289
|
+
// do nothing
|
290
|
+
break;
|
291
|
+
case ARRAY:
|
292
|
+
// skip array header to convert array to sequence
|
293
|
+
unpacker.unpackArrayHeader();
|
294
|
+
break;
|
295
|
+
}
|
239
296
|
|
240
|
-
|
241
|
-
|
297
|
+
while (true) {
|
298
|
+
Value v;
|
299
|
+
try {
|
300
|
+
v = unpacker.unpackValue();
|
301
|
+
if (v == null) {
|
302
|
+
break;
|
303
|
+
}
|
304
|
+
}
|
305
|
+
catch (MessageInsufficientBufferException e) {
|
306
|
+
break;
|
307
|
+
}
|
308
|
+
|
309
|
+
// The unpacked Value object is set to a page as a Json column value.
|
310
|
+
pageBuilder.setJson(0, v);
|
311
|
+
pageBuilder.addRecord();
|
312
|
+
}
|
242
313
|
}
|
243
314
|
}
|
244
315
|
|
@@ -264,29 +335,35 @@ public class MsgpackParserPlugin
|
|
264
335
|
if (type instanceof BooleanType) {
|
265
336
|
setter = new BooleanColumnSetter(pageBuilder, column, defaultValue);
|
266
337
|
|
267
|
-
}
|
338
|
+
}
|
339
|
+
else if (type instanceof LongType) {
|
268
340
|
setter = new LongColumnSetter(pageBuilder, column, defaultValue);
|
269
341
|
|
270
|
-
}
|
342
|
+
}
|
343
|
+
else if (type instanceof DoubleType) {
|
271
344
|
setter = new DoubleColumnSetter(pageBuilder, column, defaultValue);
|
272
345
|
|
273
|
-
}
|
346
|
+
}
|
347
|
+
else if (type instanceof StringType) {
|
274
348
|
TimestampFormatter formatter = new TimestampFormatter(formatterTask,
|
275
349
|
Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
|
276
350
|
setter = new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
|
277
351
|
|
278
|
-
}
|
352
|
+
}
|
353
|
+
else if (type instanceof TimestampType) {
|
279
354
|
// TODO use flexible time format like Ruby's Time.parse
|
280
355
|
TimestampParser parser = timestampParsers[column.getIndex()];
|
281
356
|
setter = new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
|
282
357
|
|
283
|
-
}
|
358
|
+
}
|
359
|
+
else if (type instanceof JsonType) {
|
284
360
|
TimestampFormatter formatter = new TimestampFormatter(formatterTask,
|
285
361
|
Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
|
286
362
|
setter = new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
|
287
363
|
|
288
|
-
}
|
289
|
-
|
364
|
+
}
|
365
|
+
else {
|
366
|
+
throw new ConfigException("Unknown column type: " + type);
|
290
367
|
}
|
291
368
|
|
292
369
|
builder.put(column, setter);
|
@@ -317,10 +394,12 @@ public class MsgpackParserPlugin
|
|
317
394
|
BigInteger bi = unpacker.unpackBigInteger();
|
318
395
|
if (0 <= bi.compareTo(LONG_MIN) && bi.compareTo(LONG_MAX) <= 0) {
|
319
396
|
setter.set(bi.longValue());
|
320
|
-
}
|
397
|
+
}
|
398
|
+
else {
|
321
399
|
setter.setNull(); // TODO set default value
|
322
400
|
}
|
323
|
-
}
|
401
|
+
}
|
402
|
+
else {
|
324
403
|
setter.set(unpacker.unpackLong());
|
325
404
|
}
|
326
405
|
break;
|
@@ -372,14 +451,16 @@ public class MsgpackParserPlugin
|
|
372
451
|
int n;
|
373
452
|
try {
|
374
453
|
n = unpacker.unpackArrayHeader();
|
375
|
-
}
|
454
|
+
}
|
455
|
+
catch (MessageInsufficientBufferException ex) {
|
376
456
|
// TODO EOFException?
|
377
457
|
return false;
|
378
458
|
}
|
379
459
|
for (int i = 0; i < n; i++) {
|
380
460
|
if (i < columnSetters.length) {
|
381
461
|
unpackToSetter(unpacker, columnSetters[i]);
|
382
|
-
}
|
462
|
+
}
|
463
|
+
else {
|
383
464
|
unpacker.skipValue();
|
384
465
|
}
|
385
466
|
}
|
@@ -405,7 +486,8 @@ public class MsgpackParserPlugin
|
|
405
486
|
int n;
|
406
487
|
try {
|
407
488
|
n = unpacker.unpackMapHeader();
|
408
|
-
}
|
489
|
+
}
|
490
|
+
catch (MessageInsufficientBufferException ex) {
|
409
491
|
// TODO EOFException?
|
410
492
|
return false;
|
411
493
|
}
|
@@ -421,7 +503,8 @@ public class MsgpackParserPlugin
|
|
421
503
|
DynamicColumnSetter setter = columnSetters.get(key);
|
422
504
|
if (setter != null) {
|
423
505
|
unpackToSetter(unpacker, setter);
|
424
|
-
}
|
506
|
+
}
|
507
|
+
else {
|
425
508
|
unpacker.skipValue();
|
426
509
|
}
|
427
510
|
}
|
@@ -455,7 +538,8 @@ public class MsgpackParserPlugin
|
|
455
538
|
offset += 1;
|
456
539
|
}
|
457
540
|
return 0;
|
458
|
-
}
|
541
|
+
}
|
542
|
+
else {
|
459
543
|
return o1.size() - o2.size();
|
460
544
|
}
|
461
545
|
}
|
@@ -0,0 +1,465 @@
|
|
1
|
+
package org.embulk.parser.msgpack;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.TaskSource;
|
9
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin;
|
10
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.FileEncoding;
|
11
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.PluginTask;
|
12
|
+
import org.embulk.parser.msgpack.MsgpackParserPlugin.RowEncoding;
|
13
|
+
import org.embulk.spi.ColumnConfig;
|
14
|
+
import org.embulk.spi.FileInput;
|
15
|
+
import org.embulk.spi.FileInputRunner;
|
16
|
+
import org.embulk.spi.ParserPlugin;
|
17
|
+
import org.embulk.spi.Schema;
|
18
|
+
import org.embulk.spi.SchemaConfig;
|
19
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
20
|
+
import org.embulk.spi.time.Timestamp;
|
21
|
+
import org.embulk.spi.type.Type;
|
22
|
+
import org.embulk.spi.type.Types;
|
23
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
24
|
+
import org.embulk.spi.util.Pages;
|
25
|
+
import org.embulk.standards.LocalFileInputPlugin;
|
26
|
+
import org.junit.Before;
|
27
|
+
import org.junit.Rule;
|
28
|
+
import org.junit.Test;
|
29
|
+
import org.msgpack.core.MessagePack;
|
30
|
+
import org.msgpack.core.MessagePacker;
|
31
|
+
import org.msgpack.value.ArrayValue;
|
32
|
+
import org.msgpack.value.Value;
|
33
|
+
|
34
|
+
import java.io.ByteArrayInputStream;
|
35
|
+
import java.io.ByteArrayOutputStream;
|
36
|
+
import java.io.IOException;
|
37
|
+
import java.io.InputStream;
|
38
|
+
import java.util.List;
|
39
|
+
import java.util.Random;
|
40
|
+
|
41
|
+
import static org.junit.Assert.assertEquals;
|
42
|
+
import static org.junit.Assert.assertTrue;
|
43
|
+
|
44
|
+
public class TestMsgpackParserPlugin
|
45
|
+
{
|
46
|
+
@Rule
|
47
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
48
|
+
|
49
|
+
private ConfigSource config;
|
50
|
+
private Random random;
|
51
|
+
private MsgpackParserPlugin plugin;
|
52
|
+
private FileInputRunner runner;
|
53
|
+
private MockPageOutput output;
|
54
|
+
|
55
|
+
@Before
|
56
|
+
public void createResources()
|
57
|
+
{
|
58
|
+
config = config().set("type", "msgpack");
|
59
|
+
random = runtime.getRandom();
|
60
|
+
plugin = new MsgpackParserPlugin();
|
61
|
+
runner = new FileInputRunner(new LocalFileInputPlugin());
|
62
|
+
output = new MockPageOutput();
|
63
|
+
}
|
64
|
+
|
65
|
+
@Test
|
66
|
+
public void checkDefaultValues()
|
67
|
+
{
|
68
|
+
ConfigSource config = this.config.deepCopy();
|
69
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
70
|
+
assertEquals(FileEncoding.SEQUENCE, task.getFileEncoding());
|
71
|
+
assertEquals(RowEncoding.MAP, task.getRowEncoding());
|
72
|
+
|
73
|
+
// columns
|
74
|
+
SchemaConfig schemaConfig = plugin.getSchemaConfig(task);
|
75
|
+
assertEquals(1, schemaConfig.getColumnCount());
|
76
|
+
assertEquals(Types.JSON, schemaConfig.getColumnType(0));
|
77
|
+
}
|
78
|
+
|
79
|
+
@Test(expected = ConfigException.class)
|
80
|
+
public void throwConfigErrorByInvalidFileEncoding()
|
81
|
+
{
|
82
|
+
ConfigSource config = this.config.deepCopy()
|
83
|
+
.set("columns", sampleSchema())
|
84
|
+
.set("file_encoding", "invalid");
|
85
|
+
config.loadConfig(PluginTask.class);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Test(expected = ConfigException.class)
|
89
|
+
public void throwConfigErrorByInvalidRowEncoding()
|
90
|
+
{
|
91
|
+
ConfigSource config = this.config.deepCopy()
|
92
|
+
.set("columns", sampleSchema())
|
93
|
+
.set("row_encoding", "invalid");
|
94
|
+
config.loadConfig(PluginTask.class);
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test(expected = ConfigException.class)
|
98
|
+
public void throwConfigErrorIfSchemalessWithInvalidRowEncoding()
|
99
|
+
{
|
100
|
+
ConfigSource config = this.config.deepCopy()
|
101
|
+
.set("row_encoding", "invalid");
|
102
|
+
config.loadConfig(PluginTask.class);
|
103
|
+
}
|
104
|
+
|
105
|
+
@Test
|
106
|
+
public void parseArrayArray()
|
107
|
+
throws IOException
|
108
|
+
{
|
109
|
+
SchemaConfig schema = schema(
|
110
|
+
column("_c_boolean", Types.BOOLEAN),
|
111
|
+
column("_c_string", Types.STRING),
|
112
|
+
column("_c_json", Types.JSON),
|
113
|
+
column("_c_double", Types.DOUBLE),
|
114
|
+
column("_c_long", Types.LONG),
|
115
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
116
|
+
);
|
117
|
+
ConfigSource config = this.config.deepCopy()
|
118
|
+
.set("columns", schema)
|
119
|
+
.set("file_encoding", "array")
|
120
|
+
.set("row_encoding", "array");
|
121
|
+
|
122
|
+
boolean vBoolean = random.nextBoolean();
|
123
|
+
String vString = nextString(random, random.nextInt(100));
|
124
|
+
double vDouble = random.nextDouble();
|
125
|
+
long vLong = random.nextLong();
|
126
|
+
String vJson = nextString(random, random.nextInt(100));
|
127
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
128
|
+
|
129
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
130
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
131
|
+
pk.packArrayHeader(1)
|
132
|
+
.packArrayHeader(schema.getColumnCount()) // 1 record
|
133
|
+
.packBoolean(vBoolean)
|
134
|
+
.packString(vString)
|
135
|
+
.packString(vJson)
|
136
|
+
.packDouble(vDouble)
|
137
|
+
.packLong(vLong)
|
138
|
+
.packLong(vTimestamp);
|
139
|
+
}
|
140
|
+
|
141
|
+
try (FileInput in = input(out.toByteArray())) {
|
142
|
+
transaction(config, input(out.toByteArray()), output);
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
147
|
+
assertEquals(1, records.size());
|
148
|
+
for (Object[] record : records) {
|
149
|
+
assertEquals(schema.getColumnCount(), record.length);
|
150
|
+
assertEquals(vBoolean, record[0]);
|
151
|
+
assertEquals(vString, record[1]);
|
152
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
153
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
154
|
+
assertEquals(vLong, record[4]);
|
155
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
@Test
|
160
|
+
public void parseSequenceArray()
|
161
|
+
throws IOException
|
162
|
+
{
|
163
|
+
SchemaConfig schema = schema(
|
164
|
+
column("_c_boolean", Types.BOOLEAN),
|
165
|
+
column("_c_string", Types.STRING),
|
166
|
+
column("_c_json", Types.JSON),
|
167
|
+
column("_c_double", Types.DOUBLE),
|
168
|
+
column("_c_long", Types.LONG),
|
169
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
170
|
+
);
|
171
|
+
ConfigSource config = this.config.deepCopy()
|
172
|
+
.set("columns", schema)
|
173
|
+
.set("file_encoding", "sequence")
|
174
|
+
.set("row_encoding", "array");
|
175
|
+
|
176
|
+
boolean vBoolean = random.nextBoolean();
|
177
|
+
String vString = nextString(random, random.nextInt(100));
|
178
|
+
double vDouble = random.nextDouble();
|
179
|
+
long vLong = random.nextLong();
|
180
|
+
String vJson = nextString(random, random.nextInt(100));
|
181
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
182
|
+
|
183
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
184
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
185
|
+
pk.packArrayHeader(schema.getColumnCount()) // 1 record
|
186
|
+
.packBoolean(vBoolean)
|
187
|
+
.packString(vString)
|
188
|
+
.packString(vJson)
|
189
|
+
.packDouble(vDouble)
|
190
|
+
.packLong(vLong)
|
191
|
+
.packLong(vTimestamp);
|
192
|
+
}
|
193
|
+
|
194
|
+
try (FileInput in = input(out.toByteArray())) {
|
195
|
+
transaction(config, input(out.toByteArray()), output);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
200
|
+
assertEquals(1, records.size());
|
201
|
+
for (Object[] record : records) {
|
202
|
+
assertEquals(schema.getColumnCount(), record.length);
|
203
|
+
assertEquals(vBoolean, record[0]);
|
204
|
+
assertEquals(vString, record[1]);
|
205
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
206
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
207
|
+
assertEquals(vLong, record[4]);
|
208
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
@Test
|
213
|
+
public void parseSequentialSchemalessData()
|
214
|
+
throws IOException
|
215
|
+
{
|
216
|
+
SchemaConfig schema = schema(column("record", Types.JSON));
|
217
|
+
ConfigSource config = this.config.deepCopy().set("file_encoding", "sequence");
|
218
|
+
|
219
|
+
boolean vBoolean = random.nextBoolean();
|
220
|
+
String vString = nextString(random, random.nextInt(100));
|
221
|
+
double vDouble = random.nextDouble();
|
222
|
+
long vLong = random.nextLong();
|
223
|
+
String vJson = nextString(random, random.nextInt(100));
|
224
|
+
|
225
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
226
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
227
|
+
pk.packArrayHeader(5) // 1 record
|
228
|
+
.packBoolean(vBoolean)
|
229
|
+
.packString(vString)
|
230
|
+
.packString(vJson)
|
231
|
+
.packDouble(vDouble)
|
232
|
+
.packLong(vLong);
|
233
|
+
}
|
234
|
+
|
235
|
+
try (FileInput in = input(out.toByteArray())) {
|
236
|
+
transaction(config, input(out.toByteArray()), output);
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
241
|
+
assertEquals(1, records.size());
|
242
|
+
for (Object[] record : records) {
|
243
|
+
assertEquals(1, record.length);
|
244
|
+
assertTrue(((Value) record[0]).isArrayValue());
|
245
|
+
ArrayValue v = ((Value) record[0]).asArrayValue();
|
246
|
+
assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
|
247
|
+
assertEquals(vString, v.get(1).asStringValue().asString());
|
248
|
+
assertEquals(vJson, v.get(2).asStringValue().asString());
|
249
|
+
assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
|
250
|
+
assertEquals(vLong, v.get(4).asIntegerValue().toLong());
|
251
|
+
}
|
252
|
+
}
|
253
|
+
|
254
|
+
@Test
|
255
|
+
public void parseSequenceMap()
|
256
|
+
throws IOException
|
257
|
+
{
|
258
|
+
SchemaConfig schema = schema(
|
259
|
+
column("_c_boolean", Types.BOOLEAN),
|
260
|
+
column("_c_string", Types.STRING),
|
261
|
+
column("_c_json", Types.JSON),
|
262
|
+
column("_c_double", Types.DOUBLE),
|
263
|
+
column("_c_long", Types.LONG),
|
264
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
265
|
+
);
|
266
|
+
ConfigSource config = this.config.deepCopy()
|
267
|
+
.set("columns", schema)
|
268
|
+
.set("file_encoding", "sequence")
|
269
|
+
.set("row_encoding", "map");
|
270
|
+
|
271
|
+
boolean vBoolean = random.nextBoolean();
|
272
|
+
String vString = nextString(random, random.nextInt(100));
|
273
|
+
double vDouble = random.nextDouble();
|
274
|
+
long vLong = random.nextLong();
|
275
|
+
String vJson = nextString(random, random.nextInt(100));
|
276
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
277
|
+
|
278
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
279
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
280
|
+
pk.packMapHeader(schema.getColumnCount()) // 1 record
|
281
|
+
.packString(schema.getColumnName(0)).packBoolean(vBoolean)
|
282
|
+
.packString(schema.getColumnName(1)).packString(vString)
|
283
|
+
.packString(schema.getColumnName(2)).packString(vJson)
|
284
|
+
.packString(schema.getColumnName(3)).packDouble(vDouble)
|
285
|
+
.packString(schema.getColumnName(4)).packLong(vLong)
|
286
|
+
.packString(schema.getColumnName(5)).packLong(vTimestamp);
|
287
|
+
}
|
288
|
+
|
289
|
+
try (FileInput in = input(out.toByteArray())) {
|
290
|
+
transaction(config, input(out.toByteArray()), output);
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
295
|
+
assertEquals(1, records.size());
|
296
|
+
for (Object[] record : records) {
|
297
|
+
assertEquals(schema.getColumnCount(), record.length);
|
298
|
+
assertEquals(vBoolean, record[0]);
|
299
|
+
assertEquals(vString, record[1]);
|
300
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
301
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
302
|
+
assertEquals(vLong, record[4]);
|
303
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
@Test
|
308
|
+
public void parseArrayMap()
|
309
|
+
throws IOException
|
310
|
+
{
|
311
|
+
SchemaConfig schema = schema(
|
312
|
+
column("_c_boolean", Types.BOOLEAN),
|
313
|
+
column("_c_string", Types.STRING),
|
314
|
+
column("_c_json", Types.JSON),
|
315
|
+
column("_c_double", Types.DOUBLE),
|
316
|
+
column("_c_long", Types.LONG),
|
317
|
+
column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
|
318
|
+
);
|
319
|
+
ConfigSource config = this.config.deepCopy()
|
320
|
+
.set("columns", schema)
|
321
|
+
.set("file_encoding", "array")
|
322
|
+
.set("row_encoding", "map");
|
323
|
+
|
324
|
+
boolean vBoolean = random.nextBoolean();
|
325
|
+
String vString = nextString(random, random.nextInt(100));
|
326
|
+
double vDouble = random.nextDouble();
|
327
|
+
long vLong = random.nextLong();
|
328
|
+
String vJson = nextString(random, random.nextInt(100));
|
329
|
+
long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
|
330
|
+
|
331
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
332
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
333
|
+
pk.packArrayHeader(1)
|
334
|
+
.packMapHeader(schema.getColumnCount()) // 1 record
|
335
|
+
.packString(schema.getColumnName(0)).packBoolean(vBoolean)
|
336
|
+
.packString(schema.getColumnName(1)).packString(vString)
|
337
|
+
.packString(schema.getColumnName(2)).packString(vJson)
|
338
|
+
.packString(schema.getColumnName(3)).packDouble(vDouble)
|
339
|
+
.packString(schema.getColumnName(4)).packLong(vLong)
|
340
|
+
.packString(schema.getColumnName(5)).packLong(vTimestamp);
|
341
|
+
}
|
342
|
+
|
343
|
+
try (FileInput in = input(out.toByteArray())) {
|
344
|
+
transaction(config, input(out.toByteArray()), output);
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
349
|
+
assertEquals(1, records.size());
|
350
|
+
for (Object[] record : records) {
|
351
|
+
assertEquals(schema.getColumnCount(), record.length);
|
352
|
+
assertEquals(vBoolean, record[0]);
|
353
|
+
assertEquals(vString, record[1]);
|
354
|
+
assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
|
355
|
+
assertEquals(vDouble, (double) record[3], 0.001);
|
356
|
+
assertEquals(vLong, record[4]);
|
357
|
+
assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
|
358
|
+
}
|
359
|
+
}
|
360
|
+
|
361
|
+
@Test
|
362
|
+
public void parseArraySchemalessData()
|
363
|
+
throws IOException
|
364
|
+
{
|
365
|
+
SchemaConfig schema = schema(column("record", Types.JSON));
|
366
|
+
ConfigSource config = this.config.deepCopy().set("file_encoding", "array");
|
367
|
+
|
368
|
+
boolean vBoolean = random.nextBoolean();
|
369
|
+
String vString = nextString(random, random.nextInt(100));
|
370
|
+
double vDouble = random.nextDouble();
|
371
|
+
long vLong = random.nextLong();
|
372
|
+
String vJson = nextString(random, random.nextInt(100));
|
373
|
+
|
374
|
+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
375
|
+
try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
|
376
|
+
pk.packArrayHeader(1)
|
377
|
+
.packArrayHeader(5) // 1 record
|
378
|
+
.packBoolean(vBoolean)
|
379
|
+
.packString(vString)
|
380
|
+
.packString(vJson)
|
381
|
+
.packDouble(vDouble)
|
382
|
+
.packLong(vLong);
|
383
|
+
}
|
384
|
+
|
385
|
+
try (FileInput in = input(out.toByteArray())) {
|
386
|
+
transaction(config, input(out.toByteArray()), output);
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
391
|
+
assertEquals(1, records.size());
|
392
|
+
for (Object[] record : records) {
|
393
|
+
assertEquals(1, record.length);
|
394
|
+
assertTrue(((Value) record[0]).isArrayValue());
|
395
|
+
ArrayValue v = ((Value) record[0]).asArrayValue();
|
396
|
+
assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
|
397
|
+
assertEquals(vString, v.get(1).asStringValue().asString());
|
398
|
+
assertEquals(vJson, v.get(2).asStringValue().asString());
|
399
|
+
assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
|
400
|
+
assertEquals(vLong, v.get(4).asIntegerValue().toLong());
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
private ConfigSource config()
|
405
|
+
{
|
406
|
+
return runtime.getExec().newConfigSource();
|
407
|
+
}
|
408
|
+
|
409
|
+
private SchemaConfig sampleSchema()
|
410
|
+
{
|
411
|
+
return schema(column("_c0", Types.STRING));
|
412
|
+
}
|
413
|
+
|
414
|
+
private SchemaConfig schema(ColumnConfig... columns)
|
415
|
+
{
|
416
|
+
return new SchemaConfig(Lists.newArrayList(columns));
|
417
|
+
}
|
418
|
+
|
419
|
+
private ColumnConfig column(String name, Type type)
|
420
|
+
{
|
421
|
+
return column(name, type, config());
|
422
|
+
}
|
423
|
+
|
424
|
+
private ColumnConfig column(String name, Type type, ConfigSource config)
|
425
|
+
{
|
426
|
+
return new ColumnConfig(name, type, config);
|
427
|
+
}
|
428
|
+
|
429
|
+
private void transaction(ConfigSource config, final FileInput input, final MockPageOutput output)
|
430
|
+
{
|
431
|
+
plugin.transaction(config, new ParserPlugin.Control()
|
432
|
+
{
|
433
|
+
@Override
|
434
|
+
public void run(TaskSource taskSource, Schema schema)
|
435
|
+
{
|
436
|
+
plugin.run(taskSource, schema, input, output);
|
437
|
+
}
|
438
|
+
});
|
439
|
+
}
|
440
|
+
|
441
|
+
private FileInput input(byte[] bytes)
|
442
|
+
{
|
443
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(new ByteArrayInputStream(bytes)));
|
444
|
+
}
|
445
|
+
|
446
|
+
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
447
|
+
{
|
448
|
+
return new InputStreamFileInput.IteratorProvider(ImmutableList.copyOf(inputStreams));
|
449
|
+
}
|
450
|
+
|
451
|
+
private static String nextString(Random random, int lengthBound)
|
452
|
+
{
|
453
|
+
char[] text = new char[lengthBound];
|
454
|
+
for (int i = 0; i < text.length; i++) {
|
455
|
+
text[i] = (char) random.nextInt(255);
|
456
|
+
}
|
457
|
+
return new String(text);
|
458
|
+
}
|
459
|
+
|
460
|
+
private static long nextUnixtime(Random random, String baseTime, int bound)
|
461
|
+
{
|
462
|
+
long baseUnixtime = java.sql.Timestamp.valueOf(baseTime).getTime();
|
463
|
+
return baseUnixtime + random.nextInt(bound);
|
464
|
+
}
|
465
|
+
}
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-msgpack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses files encoded in MessagePack.
|
42
42
|
email:
|
43
43
|
- frsyuki@gmail.com
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- COPYING
|
50
51
|
- ChangeLog
|
51
52
|
- README.md
|
@@ -59,8 +60,8 @@ files:
|
|
59
60
|
- lib/embulk/guess/msgpack.rb
|
60
61
|
- lib/embulk/parser/msgpack.rb
|
61
62
|
- src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java
|
62
|
-
- src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java
|
63
|
-
- classpath/embulk-parser-msgpack-0.2.
|
63
|
+
- src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java
|
64
|
+
- classpath/embulk-parser-msgpack-0.2.2.jar
|
64
65
|
homepage: https://github.com/frsyuki/embulk-parser-msgpack
|
65
66
|
licenses:
|
66
67
|
- Apache 2.0
|