embulk-parser-msgpack 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 90cbac3b997c6ed3d27a9f6bb4f7f534f8e4a1b8
4
- data.tar.gz: 2d9be2d25cdb4fc7ab221c468a790d303ac99df2
3
+ metadata.gz: 5b4c775c2942e56f3df0d9a6af992220c4fa1d4a
4
+ data.tar.gz: ef29ab00cffc8e5f5df887586cfd83e0bfafd955
5
5
  SHA512:
6
- metadata.gz: f8268bf872a5dcb689f595f223102f4b761cd0e55e9678b9af47ba63c434cdedc2c37396efdb9a88442170f89d4d90422d09e4fda68e1374928baec68ac5af02
7
- data.tar.gz: 57ddd8e8b72d2a35c375c165aeb3c20fac5f554022fa8f710e405c1c81b5bd554dc0fb9a74aaf429bdf0f9c27ead223aba2cc967c2dc598a46e315d03f5e34f4
6
+ metadata.gz: e52e2c9ffdfaa491eb1298e31bbbe55661c06ef0b3b41f1230713dfaa8bb1c46984ffa907c4ae9f445229eeb91fb9ee17af10e43e0844d12bda2598130a4a5d3
7
+ data.tar.gz: aa5690f19cc16469e8935e8599e61eb68602ffd1512152ec78be437bdb70a7f3018f020d8aaa5c14eefe6d720b09d7c2d3da93528c430752e42439edf69ef7d0
@@ -0,0 +1,6 @@
1
+ language: java
2
+ jdk:
3
+ - oraclejdk7
4
+ script:
5
+ - ./gradlew gem
6
+ - ./gradlew --info check jacocoTestReport
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.2.2 - 2016-11-03
2
+
3
+ * Make 'columns' config optional and enable schemaless data parsing [#5, #6, #7]
4
+ * Upgraded Embulk version to v0.8.14 [#3]
5
+
1
6
  Release 0.2.1 - 2016-02-24
2
7
 
3
8
  * Upgraded msgpack-java version to v0.8.3
data/README.md CHANGED
@@ -11,7 +11,7 @@ Parses files encoded in MessagePack.
11
11
 
12
12
  - **row_encoding**: type of a row. "array" or "map" (enum, default: map)
13
13
  - **file_encoding**: if a file includes a big array, set "array". Otherwise, if a file includes sequence of rows, set "sequence" (enum, default: sequence)
14
- - **columns**: description (schema, required)
14
+ - **columns**: description (schema, default: a single Json typed column)
15
15
 
16
16
  ## Example
17
17
 
@@ -3,6 +3,8 @@ plugins {
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
5
  id "checkstyle"
6
+ id "findbugs"
7
+ id "jacoco"
6
8
  }
7
9
  import com.github.jrubygradle.JRubyExec
8
10
  repositories {
@@ -14,15 +16,17 @@ configurations {
14
16
  provided
15
17
  }
16
18
 
17
- version = "0.2.1"
19
+ version = "0.2.2"
18
20
 
19
21
  sourceCompatibility = 1.7
20
22
  targetCompatibility = 1.7
21
23
 
22
24
  dependencies {
23
- compile "org.embulk:embulk-core:0.8.3"
24
- provided "org.embulk:embulk-core:0.8.3"
25
+ compile "org.embulk:embulk-core:0.8.14"
26
+ provided "org.embulk:embulk-core:0.8.14"
25
27
  testCompile "junit:junit:4.+"
28
+ testCompile "org.embulk:embulk-core:0.8.14:tests"
29
+ testCompile "org.embulk:embulk-standards:0.8.14"
26
30
  }
27
31
 
28
32
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -49,6 +53,16 @@ task checkstyle(type: Checkstyle) {
49
53
  source = sourceSets.main.allJava + sourceSets.test.allJava
50
54
  }
51
55
 
56
+ tasks.withType(FindBugs) {
57
+ reports {
58
+ xml.enabled = false
59
+ html.enabled = true
60
+ }
61
+ }
62
+ findbugs {
63
+ ignoreFailures = true
64
+ }
65
+
52
66
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
53
67
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
54
68
  script "${project.name}.gemspec"
@@ -6,16 +6,23 @@ import java.util.TreeMap;
6
6
  import java.util.Comparator;
7
7
  import java.io.IOException;
8
8
  import java.io.EOFException;
9
+
10
+ import com.google.common.annotations.VisibleForTesting;
9
11
  import com.google.common.base.Optional;
12
+ import com.google.common.collect.ImmutableList;
10
13
  import com.google.common.collect.ImmutableMap;
11
14
  import com.fasterxml.jackson.annotation.JsonCreator;
12
15
  import com.fasterxml.jackson.annotation.JsonValue;
16
+ import com.google.common.collect.Lists;
17
+ import org.embulk.spi.Exec;
18
+ import org.embulk.spi.type.Types;
13
19
  import org.msgpack.core.MessagePack;
14
20
  import org.msgpack.core.MessageFormat;
15
21
  import org.msgpack.core.MessageUnpacker;
16
22
  import org.msgpack.core.MessageInsufficientBufferException;
17
23
  import org.msgpack.core.buffer.MessageBuffer;
18
24
  import org.msgpack.core.buffer.MessageBufferInput;
25
+ import org.msgpack.value.Value;
19
26
  import org.msgpack.value.ValueType;
20
27
  import org.embulk.config.Config;
21
28
  import org.embulk.config.ConfigException;
@@ -58,6 +65,9 @@ import org.embulk.spi.util.dynamic.JsonColumnSetter;
58
65
  import org.embulk.spi.util.dynamic.DefaultValueSetter;
59
66
  import org.embulk.spi.util.dynamic.NullDefaultValueSetter;
60
67
 
68
+ import static org.embulk.spi.Exec.newConfigSource;
69
+ import static org.embulk.spi.type.Types.*;
70
+
61
71
  public class MsgpackParserPlugin
62
72
  implements ParserPlugin
63
73
  {
@@ -73,10 +83,14 @@ public class MsgpackParserPlugin
73
83
  public RowEncoding getRowEncoding();
74
84
 
75
85
  @Config("columns")
76
- public SchemaConfig getSchemaConfig();
86
+ @ConfigDefault("null")
87
+ public Optional<SchemaConfig> getSchemaConfig();
77
88
 
78
89
  @ConfigInject
79
90
  public BufferAllocator getBufferAllocator();
91
+
92
+ public void setSchemafulMode(boolean v);
93
+ public boolean getSchemafulMode();
80
94
  }
81
95
 
82
96
  public static enum FileEncoding
@@ -195,7 +209,30 @@ public class MsgpackParserPlugin
195
209
  {
196
210
  PluginTask task = config.loadConfig(PluginTask.class);
197
211
 
198
- control.run(task.dump(), task.getSchemaConfig().toSchema());
212
+ if (!task.getSchemaConfig().isPresent()) {
213
+ // If columns: is not set, the parser behaves as non-schemaful mode. It doesn't care of row encoding.
214
+ if (config.has("row_encoding")) {
215
+ throw new ConfigException("Setting row_encoding: is invalid if columns: is not set.");
216
+ }
217
+ task.setSchemafulMode(false);
218
+ }
219
+ else {
220
+ task.setSchemafulMode(true);
221
+ }
222
+
223
+ control.run(task.dump(), getSchemaConfig(task).toSchema());
224
+ }
225
+
226
+ @VisibleForTesting
227
+ SchemaConfig getSchemaConfig(PluginTask task)
228
+ {
229
+ Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
230
+ if (schemaConfig.isPresent()) {
231
+ return schemaConfig.get();
232
+ }
233
+ else {
234
+ return new SchemaConfig(ImmutableList.of(new ColumnConfig("record", JSON, newConfigSource())));
235
+ }
199
236
  }
200
237
 
201
238
  @Override
@@ -204,41 +241,75 @@ public class MsgpackParserPlugin
204
241
  {
205
242
  PluginTask task = taskSource.loadTask(PluginTask.class);
206
243
 
207
- RowEncoding rowEncoding = task.getRowEncoding();
244
+ boolean schemafulMode = task.getSchemafulMode();
208
245
  FileEncoding fileEncoding = task.getFileEncoding();
209
246
 
210
247
  try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new FileInputMessageBufferInput(input));
211
248
  PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
212
249
 
213
- TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
214
- Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
215
- task.getSchemaConfig(), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
216
-
217
- RowReader reader;
218
- switch (rowEncoding) {
219
- case ARRAY:
220
- reader = new ArrayRowReader(setters);
221
- break;
222
- case MAP:
223
- reader = new MapRowReader(setters);
224
- break;
225
- default:
226
- throw new IllegalArgumentException("Unexpected row encoding");
227
- }
250
+ if (schemafulMode) {
251
+ RowEncoding rowEncoding = task.getRowEncoding();
252
+ TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
253
+ Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
254
+ getSchemaConfig(task), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
228
255
 
229
- while (input.nextFile()) {
230
- switch (fileEncoding) {
231
- case SEQUENCE:
232
- // do nothing
233
- break;
256
+ RowReader reader;
257
+ switch (rowEncoding) {
234
258
  case ARRAY:
235
- // skip array header to convert array to sequence
236
- unpacker.unpackArrayHeader();
259
+ reader = new ArrayRowReader(setters);
260
+ break;
261
+ case MAP:
262
+ reader = new MapRowReader(setters);
237
263
  break;
264
+ default:
265
+ throw new IllegalArgumentException("Unexpected row encoding");
266
+ }
267
+
268
+ while (input.nextFile()) {
269
+ switch (fileEncoding) {
270
+ case SEQUENCE:
271
+ // do nothing
272
+ break;
273
+ case ARRAY:
274
+ // skip array header to convert array to sequence
275
+ unpacker.unpackArrayHeader();
276
+ break;
277
+ }
278
+
279
+ while (reader.next(unpacker)) {
280
+ pageBuilder.addRecord();
281
+ }
238
282
  }
283
+ }
284
+ else {
285
+ // If non-schemaful mode, setters is not created.
286
+ while (input.nextFile()) {
287
+ switch (fileEncoding) {
288
+ case SEQUENCE:
289
+ // do nothing
290
+ break;
291
+ case ARRAY:
292
+ // skip array header to convert array to sequence
293
+ unpacker.unpackArrayHeader();
294
+ break;
295
+ }
239
296
 
240
- while (reader.next(unpacker)) {
241
- pageBuilder.addRecord();
297
+ while (true) {
298
+ Value v;
299
+ try {
300
+ v = unpacker.unpackValue();
301
+ if (v == null) {
302
+ break;
303
+ }
304
+ }
305
+ catch (MessageInsufficientBufferException e) {
306
+ break;
307
+ }
308
+
309
+ // The unpacked Value object is set to a page as a Json column value.
310
+ pageBuilder.setJson(0, v);
311
+ pageBuilder.addRecord();
312
+ }
242
313
  }
243
314
  }
244
315
 
@@ -264,29 +335,35 @@ public class MsgpackParserPlugin
264
335
  if (type instanceof BooleanType) {
265
336
  setter = new BooleanColumnSetter(pageBuilder, column, defaultValue);
266
337
 
267
- } else if (type instanceof LongType) {
338
+ }
339
+ else if (type instanceof LongType) {
268
340
  setter = new LongColumnSetter(pageBuilder, column, defaultValue);
269
341
 
270
- } else if (type instanceof DoubleType) {
342
+ }
343
+ else if (type instanceof DoubleType) {
271
344
  setter = new DoubleColumnSetter(pageBuilder, column, defaultValue);
272
345
 
273
- } else if (type instanceof StringType) {
346
+ }
347
+ else if (type instanceof StringType) {
274
348
  TimestampFormatter formatter = new TimestampFormatter(formatterTask,
275
349
  Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
276
350
  setter = new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
277
351
 
278
- } else if (type instanceof TimestampType) {
352
+ }
353
+ else if (type instanceof TimestampType) {
279
354
  // TODO use flexible time format like Ruby's Time.parse
280
355
  TimestampParser parser = timestampParsers[column.getIndex()];
281
356
  setter = new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
282
357
 
283
- } else if (type instanceof JsonType) {
358
+ }
359
+ else if (type instanceof JsonType) {
284
360
  TimestampFormatter formatter = new TimestampFormatter(formatterTask,
285
361
  Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
286
362
  setter = new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
287
363
 
288
- } else {
289
- throw new ConfigException("Unknown column type: "+type);
364
+ }
365
+ else {
366
+ throw new ConfigException("Unknown column type: " + type);
290
367
  }
291
368
 
292
369
  builder.put(column, setter);
@@ -317,10 +394,12 @@ public class MsgpackParserPlugin
317
394
  BigInteger bi = unpacker.unpackBigInteger();
318
395
  if (0 <= bi.compareTo(LONG_MIN) && bi.compareTo(LONG_MAX) <= 0) {
319
396
  setter.set(bi.longValue());
320
- } else {
397
+ }
398
+ else {
321
399
  setter.setNull(); // TODO set default value
322
400
  }
323
- } else {
401
+ }
402
+ else {
324
403
  setter.set(unpacker.unpackLong());
325
404
  }
326
405
  break;
@@ -372,14 +451,16 @@ public class MsgpackParserPlugin
372
451
  int n;
373
452
  try {
374
453
  n = unpacker.unpackArrayHeader();
375
- } catch (MessageInsufficientBufferException ex) {
454
+ }
455
+ catch (MessageInsufficientBufferException ex) {
376
456
  // TODO EOFException?
377
457
  return false;
378
458
  }
379
459
  for (int i = 0; i < n; i++) {
380
460
  if (i < columnSetters.length) {
381
461
  unpackToSetter(unpacker, columnSetters[i]);
382
- } else {
462
+ }
463
+ else {
383
464
  unpacker.skipValue();
384
465
  }
385
466
  }
@@ -405,7 +486,8 @@ public class MsgpackParserPlugin
405
486
  int n;
406
487
  try {
407
488
  n = unpacker.unpackMapHeader();
408
- } catch (MessageInsufficientBufferException ex) {
489
+ }
490
+ catch (MessageInsufficientBufferException ex) {
409
491
  // TODO EOFException?
410
492
  return false;
411
493
  }
@@ -421,7 +503,8 @@ public class MsgpackParserPlugin
421
503
  DynamicColumnSetter setter = columnSetters.get(key);
422
504
  if (setter != null) {
423
505
  unpackToSetter(unpacker, setter);
424
- } else {
506
+ }
507
+ else {
425
508
  unpacker.skipValue();
426
509
  }
427
510
  }
@@ -455,7 +538,8 @@ public class MsgpackParserPlugin
455
538
  offset += 1;
456
539
  }
457
540
  return 0;
458
- } else {
541
+ }
542
+ else {
459
543
  return o1.size() - o2.size();
460
544
  }
461
545
  }
@@ -0,0 +1,465 @@
1
+ package org.embulk.parser.msgpack;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.parser.msgpack.MsgpackParserPlugin;
10
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.FileEncoding;
11
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.PluginTask;
12
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.RowEncoding;
13
+ import org.embulk.spi.ColumnConfig;
14
+ import org.embulk.spi.FileInput;
15
+ import org.embulk.spi.FileInputRunner;
16
+ import org.embulk.spi.ParserPlugin;
17
+ import org.embulk.spi.Schema;
18
+ import org.embulk.spi.SchemaConfig;
19
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
20
+ import org.embulk.spi.time.Timestamp;
21
+ import org.embulk.spi.type.Type;
22
+ import org.embulk.spi.type.Types;
23
+ import org.embulk.spi.util.InputStreamFileInput;
24
+ import org.embulk.spi.util.Pages;
25
+ import org.embulk.standards.LocalFileInputPlugin;
26
+ import org.junit.Before;
27
+ import org.junit.Rule;
28
+ import org.junit.Test;
29
+ import org.msgpack.core.MessagePack;
30
+ import org.msgpack.core.MessagePacker;
31
+ import org.msgpack.value.ArrayValue;
32
+ import org.msgpack.value.Value;
33
+
34
+ import java.io.ByteArrayInputStream;
35
+ import java.io.ByteArrayOutputStream;
36
+ import java.io.IOException;
37
+ import java.io.InputStream;
38
+ import java.util.List;
39
+ import java.util.Random;
40
+
41
+ import static org.junit.Assert.assertEquals;
42
+ import static org.junit.Assert.assertTrue;
43
+
44
+ public class TestMsgpackParserPlugin
45
+ {
46
+ @Rule
47
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
48
+
49
+ private ConfigSource config;
50
+ private Random random;
51
+ private MsgpackParserPlugin plugin;
52
+ private FileInputRunner runner;
53
+ private MockPageOutput output;
54
+
55
+ @Before
56
+ public void createResources()
57
+ {
58
+ config = config().set("type", "msgpack");
59
+ random = runtime.getRandom();
60
+ plugin = new MsgpackParserPlugin();
61
+ runner = new FileInputRunner(new LocalFileInputPlugin());
62
+ output = new MockPageOutput();
63
+ }
64
+
65
+ @Test
66
+ public void checkDefaultValues()
67
+ {
68
+ ConfigSource config = this.config.deepCopy();
69
+ PluginTask task = config.loadConfig(PluginTask.class);
70
+ assertEquals(FileEncoding.SEQUENCE, task.getFileEncoding());
71
+ assertEquals(RowEncoding.MAP, task.getRowEncoding());
72
+
73
+ // columns
74
+ SchemaConfig schemaConfig = plugin.getSchemaConfig(task);
75
+ assertEquals(1, schemaConfig.getColumnCount());
76
+ assertEquals(Types.JSON, schemaConfig.getColumnType(0));
77
+ }
78
+
79
+ @Test(expected = ConfigException.class)
80
+ public void throwConfigErrorByInvalidFileEncoding()
81
+ {
82
+ ConfigSource config = this.config.deepCopy()
83
+ .set("columns", sampleSchema())
84
+ .set("file_encoding", "invalid");
85
+ config.loadConfig(PluginTask.class);
86
+ }
87
+
88
+ @Test(expected = ConfigException.class)
89
+ public void throwConfigErrorByInvalidRowEncoding()
90
+ {
91
+ ConfigSource config = this.config.deepCopy()
92
+ .set("columns", sampleSchema())
93
+ .set("row_encoding", "invalid");
94
+ config.loadConfig(PluginTask.class);
95
+ }
96
+
97
+ @Test(expected = ConfigException.class)
98
+ public void throwConfigErrorIfSchemalessWithInvalidRowEncoding()
99
+ {
100
+ ConfigSource config = this.config.deepCopy()
101
+ .set("row_encoding", "invalid");
102
+ config.loadConfig(PluginTask.class);
103
+ }
104
+
105
+ @Test
106
+ public void parseArrayArray()
107
+ throws IOException
108
+ {
109
+ SchemaConfig schema = schema(
110
+ column("_c_boolean", Types.BOOLEAN),
111
+ column("_c_string", Types.STRING),
112
+ column("_c_json", Types.JSON),
113
+ column("_c_double", Types.DOUBLE),
114
+ column("_c_long", Types.LONG),
115
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
116
+ );
117
+ ConfigSource config = this.config.deepCopy()
118
+ .set("columns", schema)
119
+ .set("file_encoding", "array")
120
+ .set("row_encoding", "array");
121
+
122
+ boolean vBoolean = random.nextBoolean();
123
+ String vString = nextString(random, random.nextInt(100));
124
+ double vDouble = random.nextDouble();
125
+ long vLong = random.nextLong();
126
+ String vJson = nextString(random, random.nextInt(100));
127
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
128
+
129
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
130
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
131
+ pk.packArrayHeader(1)
132
+ .packArrayHeader(schema.getColumnCount()) // 1 record
133
+ .packBoolean(vBoolean)
134
+ .packString(vString)
135
+ .packString(vJson)
136
+ .packDouble(vDouble)
137
+ .packLong(vLong)
138
+ .packLong(vTimestamp);
139
+ }
140
+
141
+ try (FileInput in = input(out.toByteArray())) {
142
+ transaction(config, input(out.toByteArray()), output);
143
+ }
144
+ }
145
+
146
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
147
+ assertEquals(1, records.size());
148
+ for (Object[] record : records) {
149
+ assertEquals(schema.getColumnCount(), record.length);
150
+ assertEquals(vBoolean, record[0]);
151
+ assertEquals(vString, record[1]);
152
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
153
+ assertEquals(vDouble, (double) record[3], 0.001);
154
+ assertEquals(vLong, record[4]);
155
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
156
+ }
157
+ }
158
+
159
+ @Test
160
+ public void parseSequenceArray()
161
+ throws IOException
162
+ {
163
+ SchemaConfig schema = schema(
164
+ column("_c_boolean", Types.BOOLEAN),
165
+ column("_c_string", Types.STRING),
166
+ column("_c_json", Types.JSON),
167
+ column("_c_double", Types.DOUBLE),
168
+ column("_c_long", Types.LONG),
169
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
170
+ );
171
+ ConfigSource config = this.config.deepCopy()
172
+ .set("columns", schema)
173
+ .set("file_encoding", "sequence")
174
+ .set("row_encoding", "array");
175
+
176
+ boolean vBoolean = random.nextBoolean();
177
+ String vString = nextString(random, random.nextInt(100));
178
+ double vDouble = random.nextDouble();
179
+ long vLong = random.nextLong();
180
+ String vJson = nextString(random, random.nextInt(100));
181
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
182
+
183
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
184
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
185
+ pk.packArrayHeader(schema.getColumnCount()) // 1 record
186
+ .packBoolean(vBoolean)
187
+ .packString(vString)
188
+ .packString(vJson)
189
+ .packDouble(vDouble)
190
+ .packLong(vLong)
191
+ .packLong(vTimestamp);
192
+ }
193
+
194
+ try (FileInput in = input(out.toByteArray())) {
195
+ transaction(config, input(out.toByteArray()), output);
196
+ }
197
+ }
198
+
199
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
200
+ assertEquals(1, records.size());
201
+ for (Object[] record : records) {
202
+ assertEquals(schema.getColumnCount(), record.length);
203
+ assertEquals(vBoolean, record[0]);
204
+ assertEquals(vString, record[1]);
205
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
206
+ assertEquals(vDouble, (double) record[3], 0.001);
207
+ assertEquals(vLong, record[4]);
208
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
209
+ }
210
+ }
211
+
212
+ @Test
213
+ public void parseSequentialSchemalessData()
214
+ throws IOException
215
+ {
216
+ SchemaConfig schema = schema(column("record", Types.JSON));
217
+ ConfigSource config = this.config.deepCopy().set("file_encoding", "sequence");
218
+
219
+ boolean vBoolean = random.nextBoolean();
220
+ String vString = nextString(random, random.nextInt(100));
221
+ double vDouble = random.nextDouble();
222
+ long vLong = random.nextLong();
223
+ String vJson = nextString(random, random.nextInt(100));
224
+
225
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
226
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
227
+ pk.packArrayHeader(5) // 1 record
228
+ .packBoolean(vBoolean)
229
+ .packString(vString)
230
+ .packString(vJson)
231
+ .packDouble(vDouble)
232
+ .packLong(vLong);
233
+ }
234
+
235
+ try (FileInput in = input(out.toByteArray())) {
236
+ transaction(config, input(out.toByteArray()), output);
237
+ }
238
+ }
239
+
240
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
241
+ assertEquals(1, records.size());
242
+ for (Object[] record : records) {
243
+ assertEquals(1, record.length);
244
+ assertTrue(((Value) record[0]).isArrayValue());
245
+ ArrayValue v = ((Value) record[0]).asArrayValue();
246
+ assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
247
+ assertEquals(vString, v.get(1).asStringValue().asString());
248
+ assertEquals(vJson, v.get(2).asStringValue().asString());
249
+ assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
250
+ assertEquals(vLong, v.get(4).asIntegerValue().toLong());
251
+ }
252
+ }
253
+
254
+ @Test
255
+ public void parseSequenceMap()
256
+ throws IOException
257
+ {
258
+ SchemaConfig schema = schema(
259
+ column("_c_boolean", Types.BOOLEAN),
260
+ column("_c_string", Types.STRING),
261
+ column("_c_json", Types.JSON),
262
+ column("_c_double", Types.DOUBLE),
263
+ column("_c_long", Types.LONG),
264
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
265
+ );
266
+ ConfigSource config = this.config.deepCopy()
267
+ .set("columns", schema)
268
+ .set("file_encoding", "sequence")
269
+ .set("row_encoding", "map");
270
+
271
+ boolean vBoolean = random.nextBoolean();
272
+ String vString = nextString(random, random.nextInt(100));
273
+ double vDouble = random.nextDouble();
274
+ long vLong = random.nextLong();
275
+ String vJson = nextString(random, random.nextInt(100));
276
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
277
+
278
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
279
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
280
+ pk.packMapHeader(schema.getColumnCount()) // 1 record
281
+ .packString(schema.getColumnName(0)).packBoolean(vBoolean)
282
+ .packString(schema.getColumnName(1)).packString(vString)
283
+ .packString(schema.getColumnName(2)).packString(vJson)
284
+ .packString(schema.getColumnName(3)).packDouble(vDouble)
285
+ .packString(schema.getColumnName(4)).packLong(vLong)
286
+ .packString(schema.getColumnName(5)).packLong(vTimestamp);
287
+ }
288
+
289
+ try (FileInput in = input(out.toByteArray())) {
290
+ transaction(config, input(out.toByteArray()), output);
291
+ }
292
+ }
293
+
294
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
295
+ assertEquals(1, records.size());
296
+ for (Object[] record : records) {
297
+ assertEquals(schema.getColumnCount(), record.length);
298
+ assertEquals(vBoolean, record[0]);
299
+ assertEquals(vString, record[1]);
300
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
301
+ assertEquals(vDouble, (double) record[3], 0.001);
302
+ assertEquals(vLong, record[4]);
303
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
304
+ }
305
+ }
306
+
307
+ @Test
308
+ public void parseArrayMap()
309
+ throws IOException
310
+ {
311
+ SchemaConfig schema = schema(
312
+ column("_c_boolean", Types.BOOLEAN),
313
+ column("_c_string", Types.STRING),
314
+ column("_c_json", Types.JSON),
315
+ column("_c_double", Types.DOUBLE),
316
+ column("_c_long", Types.LONG),
317
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
318
+ );
319
+ ConfigSource config = this.config.deepCopy()
320
+ .set("columns", schema)
321
+ .set("file_encoding", "array")
322
+ .set("row_encoding", "map");
323
+
324
+ boolean vBoolean = random.nextBoolean();
325
+ String vString = nextString(random, random.nextInt(100));
326
+ double vDouble = random.nextDouble();
327
+ long vLong = random.nextLong();
328
+ String vJson = nextString(random, random.nextInt(100));
329
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
330
+
331
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
332
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
333
+ pk.packArrayHeader(1)
334
+ .packMapHeader(schema.getColumnCount()) // 1 record
335
+ .packString(schema.getColumnName(0)).packBoolean(vBoolean)
336
+ .packString(schema.getColumnName(1)).packString(vString)
337
+ .packString(schema.getColumnName(2)).packString(vJson)
338
+ .packString(schema.getColumnName(3)).packDouble(vDouble)
339
+ .packString(schema.getColumnName(4)).packLong(vLong)
340
+ .packString(schema.getColumnName(5)).packLong(vTimestamp);
341
+ }
342
+
343
+ try (FileInput in = input(out.toByteArray())) {
344
+ transaction(config, input(out.toByteArray()), output);
345
+ }
346
+ }
347
+
348
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
349
+ assertEquals(1, records.size());
350
+ for (Object[] record : records) {
351
+ assertEquals(schema.getColumnCount(), record.length);
352
+ assertEquals(vBoolean, record[0]);
353
+ assertEquals(vString, record[1]);
354
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
355
+ assertEquals(vDouble, (double) record[3], 0.001);
356
+ assertEquals(vLong, record[4]);
357
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
358
+ }
359
+ }
360
+
361
+ @Test
362
+ public void parseArraySchemalessData()
363
+ throws IOException
364
+ {
365
+ SchemaConfig schema = schema(column("record", Types.JSON));
366
+ ConfigSource config = this.config.deepCopy().set("file_encoding", "array");
367
+
368
+ boolean vBoolean = random.nextBoolean();
369
+ String vString = nextString(random, random.nextInt(100));
370
+ double vDouble = random.nextDouble();
371
+ long vLong = random.nextLong();
372
+ String vJson = nextString(random, random.nextInt(100));
373
+
374
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
375
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
376
+ pk.packArrayHeader(1)
377
+ .packArrayHeader(5) // 1 record
378
+ .packBoolean(vBoolean)
379
+ .packString(vString)
380
+ .packString(vJson)
381
+ .packDouble(vDouble)
382
+ .packLong(vLong);
383
+ }
384
+
385
+ try (FileInput in = input(out.toByteArray())) {
386
+ transaction(config, input(out.toByteArray()), output);
387
+ }
388
+ }
389
+
390
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
391
+ assertEquals(1, records.size());
392
+ for (Object[] record : records) {
393
+ assertEquals(1, record.length);
394
+ assertTrue(((Value) record[0]).isArrayValue());
395
+ ArrayValue v = ((Value) record[0]).asArrayValue();
396
+ assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
397
+ assertEquals(vString, v.get(1).asStringValue().asString());
398
+ assertEquals(vJson, v.get(2).asStringValue().asString());
399
+ assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
400
+ assertEquals(vLong, v.get(4).asIntegerValue().toLong());
401
+ }
402
+ }
403
+
404
+ private ConfigSource config()
405
+ {
406
+ return runtime.getExec().newConfigSource();
407
+ }
408
+
409
+ private SchemaConfig sampleSchema()
410
+ {
411
+ return schema(column("_c0", Types.STRING));
412
+ }
413
+
414
+ private SchemaConfig schema(ColumnConfig... columns)
415
+ {
416
+ return new SchemaConfig(Lists.newArrayList(columns));
417
+ }
418
+
419
+ private ColumnConfig column(String name, Type type)
420
+ {
421
+ return column(name, type, config());
422
+ }
423
+
424
+ private ColumnConfig column(String name, Type type, ConfigSource config)
425
+ {
426
+ return new ColumnConfig(name, type, config);
427
+ }
428
+
429
+ private void transaction(ConfigSource config, final FileInput input, final MockPageOutput output)
430
+ {
431
+ plugin.transaction(config, new ParserPlugin.Control()
432
+ {
433
+ @Override
434
+ public void run(TaskSource taskSource, Schema schema)
435
+ {
436
+ plugin.run(taskSource, schema, input, output);
437
+ }
438
+ });
439
+ }
440
+
441
+ private FileInput input(byte[] bytes)
442
+ {
443
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(new ByteArrayInputStream(bytes)));
444
+ }
445
+
446
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
447
+ {
448
+ return new InputStreamFileInput.IteratorProvider(ImmutableList.copyOf(inputStreams));
449
+ }
450
+
451
+ private static String nextString(Random random, int lengthBound)
452
+ {
453
+ char[] text = new char[lengthBound];
454
+ for (int i = 0; i < text.length; i++) {
455
+ text[i] = (char) random.nextInt(255);
456
+ }
457
+ return new String(text);
458
+ }
459
+
460
+ private static long nextUnixtime(Random random, String baseTime, int bound)
461
+ {
462
+ long baseUnixtime = java.sql.Timestamp.valueOf(baseTime).getTime();
463
+ return baseUnixtime + random.nextInt(bound);
464
+ }
465
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-msgpack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-24 00:00:00.000000000 Z
11
+ date: 2016-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Parses files encoded in MessagePack.
42
42
  email:
43
43
  - frsyuki@gmail.com
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - COPYING
50
51
  - ChangeLog
51
52
  - README.md
@@ -59,8 +60,8 @@ files:
59
60
  - lib/embulk/guess/msgpack.rb
60
61
  - lib/embulk/parser/msgpack.rb
61
62
  - src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java
62
- - src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java
63
- - classpath/embulk-parser-msgpack-0.2.1.jar
63
+ - src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java
64
+ - classpath/embulk-parser-msgpack-0.2.2.jar
64
65
  homepage: https://github.com/frsyuki/embulk-parser-msgpack
65
66
  licenses:
66
67
  - Apache 2.0
@@ -1,5 +0,0 @@
1
- package org.embulk.parser;
2
-
3
- public class TestMsgpackParserPlugin
4
- {
5
- }