embulk-parser-msgpack 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 90cbac3b997c6ed3d27a9f6bb4f7f534f8e4a1b8
4
- data.tar.gz: 2d9be2d25cdb4fc7ab221c468a790d303ac99df2
3
+ metadata.gz: 5b4c775c2942e56f3df0d9a6af992220c4fa1d4a
4
+ data.tar.gz: ef29ab00cffc8e5f5df887586cfd83e0bfafd955
5
5
  SHA512:
6
- metadata.gz: f8268bf872a5dcb689f595f223102f4b761cd0e55e9678b9af47ba63c434cdedc2c37396efdb9a88442170f89d4d90422d09e4fda68e1374928baec68ac5af02
7
- data.tar.gz: 57ddd8e8b72d2a35c375c165aeb3c20fac5f554022fa8f710e405c1c81b5bd554dc0fb9a74aaf429bdf0f9c27ead223aba2cc967c2dc598a46e315d03f5e34f4
6
+ metadata.gz: e52e2c9ffdfaa491eb1298e31bbbe55661c06ef0b3b41f1230713dfaa8bb1c46984ffa907c4ae9f445229eeb91fb9ee17af10e43e0844d12bda2598130a4a5d3
7
+ data.tar.gz: aa5690f19cc16469e8935e8599e61eb68602ffd1512152ec78be437bdb70a7f3018f020d8aaa5c14eefe6d720b09d7c2d3da93528c430752e42439edf69ef7d0
@@ -0,0 +1,6 @@
1
+ language: java
2
+ jdk:
3
+ - oraclejdk7
4
+ script:
5
+ - ./gradlew gem
6
+ - ./gradlew --info check jacocoTestReport
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.2.2 - 2016-11-03
2
+
3
+ * Make 'columns' config optional and enable schemaless data parsing [#5, #6, #7]
4
+ * Upgraded Embulk version to v0.8.14 [#3]
5
+
1
6
  Release 0.2.1 - 2016-02-24
2
7
 
3
8
  * Upgraded msgpack-java version to v0.8.3
data/README.md CHANGED
@@ -11,7 +11,7 @@ Parses files encoded in MessagePack.
11
11
 
12
12
  - **row_encoding**: type of a row. "array" or "map" (enum, default: map)
13
13
  - **file_encoding**: if a file includes a big array, set "array". Otherwise, if a file includes sequence of rows, set "sequence" (enum, default: sequence)
14
- - **columns**: description (schema, required)
14
+ - **columns**: description (schema, default: a single Json typed column)
15
15
 
16
16
  ## Example
17
17
 
@@ -3,6 +3,8 @@ plugins {
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
5
  id "checkstyle"
6
+ id "findbugs"
7
+ id "jacoco"
6
8
  }
7
9
  import com.github.jrubygradle.JRubyExec
8
10
  repositories {
@@ -14,15 +16,17 @@ configurations {
14
16
  provided
15
17
  }
16
18
 
17
- version = "0.2.1"
19
+ version = "0.2.2"
18
20
 
19
21
  sourceCompatibility = 1.7
20
22
  targetCompatibility = 1.7
21
23
 
22
24
  dependencies {
23
- compile "org.embulk:embulk-core:0.8.3"
24
- provided "org.embulk:embulk-core:0.8.3"
25
+ compile "org.embulk:embulk-core:0.8.14"
26
+ provided "org.embulk:embulk-core:0.8.14"
25
27
  testCompile "junit:junit:4.+"
28
+ testCompile "org.embulk:embulk-core:0.8.14:tests"
29
+ testCompile "org.embulk:embulk-standards:0.8.14"
26
30
  }
27
31
 
28
32
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -49,6 +53,16 @@ task checkstyle(type: Checkstyle) {
49
53
  source = sourceSets.main.allJava + sourceSets.test.allJava
50
54
  }
51
55
 
56
+ tasks.withType(FindBugs) {
57
+ reports {
58
+ xml.enabled = false
59
+ html.enabled = true
60
+ }
61
+ }
62
+ findbugs {
63
+ ignoreFailures = true
64
+ }
65
+
52
66
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
53
67
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
54
68
  script "${project.name}.gemspec"
@@ -6,16 +6,23 @@ import java.util.TreeMap;
6
6
  import java.util.Comparator;
7
7
  import java.io.IOException;
8
8
  import java.io.EOFException;
9
+
10
+ import com.google.common.annotations.VisibleForTesting;
9
11
  import com.google.common.base.Optional;
12
+ import com.google.common.collect.ImmutableList;
10
13
  import com.google.common.collect.ImmutableMap;
11
14
  import com.fasterxml.jackson.annotation.JsonCreator;
12
15
  import com.fasterxml.jackson.annotation.JsonValue;
16
+ import com.google.common.collect.Lists;
17
+ import org.embulk.spi.Exec;
18
+ import org.embulk.spi.type.Types;
13
19
  import org.msgpack.core.MessagePack;
14
20
  import org.msgpack.core.MessageFormat;
15
21
  import org.msgpack.core.MessageUnpacker;
16
22
  import org.msgpack.core.MessageInsufficientBufferException;
17
23
  import org.msgpack.core.buffer.MessageBuffer;
18
24
  import org.msgpack.core.buffer.MessageBufferInput;
25
+ import org.msgpack.value.Value;
19
26
  import org.msgpack.value.ValueType;
20
27
  import org.embulk.config.Config;
21
28
  import org.embulk.config.ConfigException;
@@ -58,6 +65,9 @@ import org.embulk.spi.util.dynamic.JsonColumnSetter;
58
65
  import org.embulk.spi.util.dynamic.DefaultValueSetter;
59
66
  import org.embulk.spi.util.dynamic.NullDefaultValueSetter;
60
67
 
68
+ import static org.embulk.spi.Exec.newConfigSource;
69
+ import static org.embulk.spi.type.Types.*;
70
+
61
71
  public class MsgpackParserPlugin
62
72
  implements ParserPlugin
63
73
  {
@@ -73,10 +83,14 @@ public class MsgpackParserPlugin
73
83
  public RowEncoding getRowEncoding();
74
84
 
75
85
  @Config("columns")
76
- public SchemaConfig getSchemaConfig();
86
+ @ConfigDefault("null")
87
+ public Optional<SchemaConfig> getSchemaConfig();
77
88
 
78
89
  @ConfigInject
79
90
  public BufferAllocator getBufferAllocator();
91
+
92
+ public void setSchemafulMode(boolean v);
93
+ public boolean getSchemafulMode();
80
94
  }
81
95
 
82
96
  public static enum FileEncoding
@@ -195,7 +209,30 @@ public class MsgpackParserPlugin
195
209
  {
196
210
  PluginTask task = config.loadConfig(PluginTask.class);
197
211
 
198
- control.run(task.dump(), task.getSchemaConfig().toSchema());
212
+ if (!task.getSchemaConfig().isPresent()) {
213
+ // If columns: is not set, the parser behaves as non-schemaful mode. It doesn't care of row encoding.
214
+ if (config.has("row_encoding")) {
215
+ throw new ConfigException("Setting row_encoding: is invalid if columns: is not set.");
216
+ }
217
+ task.setSchemafulMode(false);
218
+ }
219
+ else {
220
+ task.setSchemafulMode(true);
221
+ }
222
+
223
+ control.run(task.dump(), getSchemaConfig(task).toSchema());
224
+ }
225
+
226
+ @VisibleForTesting
227
+ SchemaConfig getSchemaConfig(PluginTask task)
228
+ {
229
+ Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
230
+ if (schemaConfig.isPresent()) {
231
+ return schemaConfig.get();
232
+ }
233
+ else {
234
+ return new SchemaConfig(ImmutableList.of(new ColumnConfig("record", JSON, newConfigSource())));
235
+ }
199
236
  }
200
237
 
201
238
  @Override
@@ -204,41 +241,75 @@ public class MsgpackParserPlugin
204
241
  {
205
242
  PluginTask task = taskSource.loadTask(PluginTask.class);
206
243
 
207
- RowEncoding rowEncoding = task.getRowEncoding();
244
+ boolean schemafulMode = task.getSchemafulMode();
208
245
  FileEncoding fileEncoding = task.getFileEncoding();
209
246
 
210
247
  try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new FileInputMessageBufferInput(input));
211
248
  PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
212
249
 
213
- TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
214
- Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
215
- task.getSchemaConfig(), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
216
-
217
- RowReader reader;
218
- switch (rowEncoding) {
219
- case ARRAY:
220
- reader = new ArrayRowReader(setters);
221
- break;
222
- case MAP:
223
- reader = new MapRowReader(setters);
224
- break;
225
- default:
226
- throw new IllegalArgumentException("Unexpected row encoding");
227
- }
250
+ if (schemafulMode) {
251
+ RowEncoding rowEncoding = task.getRowEncoding();
252
+ TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
253
+ Map<Column, DynamicColumnSetter> setters = newColumnSetters(pageBuilder,
254
+ getSchemaConfig(task), timestampParsers, taskSource.loadTask(PluginTaskFormatter.class));
228
255
 
229
- while (input.nextFile()) {
230
- switch (fileEncoding) {
231
- case SEQUENCE:
232
- // do nothing
233
- break;
256
+ RowReader reader;
257
+ switch (rowEncoding) {
234
258
  case ARRAY:
235
- // skip array header to convert array to sequence
236
- unpacker.unpackArrayHeader();
259
+ reader = new ArrayRowReader(setters);
260
+ break;
261
+ case MAP:
262
+ reader = new MapRowReader(setters);
237
263
  break;
264
+ default:
265
+ throw new IllegalArgumentException("Unexpected row encoding");
266
+ }
267
+
268
+ while (input.nextFile()) {
269
+ switch (fileEncoding) {
270
+ case SEQUENCE:
271
+ // do nothing
272
+ break;
273
+ case ARRAY:
274
+ // skip array header to convert array to sequence
275
+ unpacker.unpackArrayHeader();
276
+ break;
277
+ }
278
+
279
+ while (reader.next(unpacker)) {
280
+ pageBuilder.addRecord();
281
+ }
238
282
  }
283
+ }
284
+ else {
285
+ // If non-schemaful mode, setters is not created.
286
+ while (input.nextFile()) {
287
+ switch (fileEncoding) {
288
+ case SEQUENCE:
289
+ // do nothing
290
+ break;
291
+ case ARRAY:
292
+ // skip array header to convert array to sequence
293
+ unpacker.unpackArrayHeader();
294
+ break;
295
+ }
239
296
 
240
- while (reader.next(unpacker)) {
241
- pageBuilder.addRecord();
297
+ while (true) {
298
+ Value v;
299
+ try {
300
+ v = unpacker.unpackValue();
301
+ if (v == null) {
302
+ break;
303
+ }
304
+ }
305
+ catch (MessageInsufficientBufferException e) {
306
+ break;
307
+ }
308
+
309
+ // The unpacked Value object is set to a page as a Json column value.
310
+ pageBuilder.setJson(0, v);
311
+ pageBuilder.addRecord();
312
+ }
242
313
  }
243
314
  }
244
315
 
@@ -264,29 +335,35 @@ public class MsgpackParserPlugin
264
335
  if (type instanceof BooleanType) {
265
336
  setter = new BooleanColumnSetter(pageBuilder, column, defaultValue);
266
337
 
267
- } else if (type instanceof LongType) {
338
+ }
339
+ else if (type instanceof LongType) {
268
340
  setter = new LongColumnSetter(pageBuilder, column, defaultValue);
269
341
 
270
- } else if (type instanceof DoubleType) {
342
+ }
343
+ else if (type instanceof DoubleType) {
271
344
  setter = new DoubleColumnSetter(pageBuilder, column, defaultValue);
272
345
 
273
- } else if (type instanceof StringType) {
346
+ }
347
+ else if (type instanceof StringType) {
274
348
  TimestampFormatter formatter = new TimestampFormatter(formatterTask,
275
349
  Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
276
350
  setter = new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
277
351
 
278
- } else if (type instanceof TimestampType) {
352
+ }
353
+ else if (type instanceof TimestampType) {
279
354
  // TODO use flexible time format like Ruby's Time.parse
280
355
  TimestampParser parser = timestampParsers[column.getIndex()];
281
356
  setter = new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
282
357
 
283
- } else if (type instanceof JsonType) {
358
+ }
359
+ else if (type instanceof JsonType) {
284
360
  TimestampFormatter formatter = new TimestampFormatter(formatterTask,
285
361
  Optional.of(c.getOption().loadConfig(TimestampColumnOption.class)));
286
362
  setter = new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
287
363
 
288
- } else {
289
- throw new ConfigException("Unknown column type: "+type);
364
+ }
365
+ else {
366
+ throw new ConfigException("Unknown column type: " + type);
290
367
  }
291
368
 
292
369
  builder.put(column, setter);
@@ -317,10 +394,12 @@ public class MsgpackParserPlugin
317
394
  BigInteger bi = unpacker.unpackBigInteger();
318
395
  if (0 <= bi.compareTo(LONG_MIN) && bi.compareTo(LONG_MAX) <= 0) {
319
396
  setter.set(bi.longValue());
320
- } else {
397
+ }
398
+ else {
321
399
  setter.setNull(); // TODO set default value
322
400
  }
323
- } else {
401
+ }
402
+ else {
324
403
  setter.set(unpacker.unpackLong());
325
404
  }
326
405
  break;
@@ -372,14 +451,16 @@ public class MsgpackParserPlugin
372
451
  int n;
373
452
  try {
374
453
  n = unpacker.unpackArrayHeader();
375
- } catch (MessageInsufficientBufferException ex) {
454
+ }
455
+ catch (MessageInsufficientBufferException ex) {
376
456
  // TODO EOFException?
377
457
  return false;
378
458
  }
379
459
  for (int i = 0; i < n; i++) {
380
460
  if (i < columnSetters.length) {
381
461
  unpackToSetter(unpacker, columnSetters[i]);
382
- } else {
462
+ }
463
+ else {
383
464
  unpacker.skipValue();
384
465
  }
385
466
  }
@@ -405,7 +486,8 @@ public class MsgpackParserPlugin
405
486
  int n;
406
487
  try {
407
488
  n = unpacker.unpackMapHeader();
408
- } catch (MessageInsufficientBufferException ex) {
489
+ }
490
+ catch (MessageInsufficientBufferException ex) {
409
491
  // TODO EOFException?
410
492
  return false;
411
493
  }
@@ -421,7 +503,8 @@ public class MsgpackParserPlugin
421
503
  DynamicColumnSetter setter = columnSetters.get(key);
422
504
  if (setter != null) {
423
505
  unpackToSetter(unpacker, setter);
424
- } else {
506
+ }
507
+ else {
425
508
  unpacker.skipValue();
426
509
  }
427
510
  }
@@ -455,7 +538,8 @@ public class MsgpackParserPlugin
455
538
  offset += 1;
456
539
  }
457
540
  return 0;
458
- } else {
541
+ }
542
+ else {
459
543
  return o1.size() - o2.size();
460
544
  }
461
545
  }
@@ -0,0 +1,465 @@
1
+ package org.embulk.parser.msgpack;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.parser.msgpack.MsgpackParserPlugin;
10
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.FileEncoding;
11
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.PluginTask;
12
+ import org.embulk.parser.msgpack.MsgpackParserPlugin.RowEncoding;
13
+ import org.embulk.spi.ColumnConfig;
14
+ import org.embulk.spi.FileInput;
15
+ import org.embulk.spi.FileInputRunner;
16
+ import org.embulk.spi.ParserPlugin;
17
+ import org.embulk.spi.Schema;
18
+ import org.embulk.spi.SchemaConfig;
19
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
20
+ import org.embulk.spi.time.Timestamp;
21
+ import org.embulk.spi.type.Type;
22
+ import org.embulk.spi.type.Types;
23
+ import org.embulk.spi.util.InputStreamFileInput;
24
+ import org.embulk.spi.util.Pages;
25
+ import org.embulk.standards.LocalFileInputPlugin;
26
+ import org.junit.Before;
27
+ import org.junit.Rule;
28
+ import org.junit.Test;
29
+ import org.msgpack.core.MessagePack;
30
+ import org.msgpack.core.MessagePacker;
31
+ import org.msgpack.value.ArrayValue;
32
+ import org.msgpack.value.Value;
33
+
34
+ import java.io.ByteArrayInputStream;
35
+ import java.io.ByteArrayOutputStream;
36
+ import java.io.IOException;
37
+ import java.io.InputStream;
38
+ import java.util.List;
39
+ import java.util.Random;
40
+
41
+ import static org.junit.Assert.assertEquals;
42
+ import static org.junit.Assert.assertTrue;
43
+
44
+ public class TestMsgpackParserPlugin
45
+ {
46
+ @Rule
47
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
48
+
49
+ private ConfigSource config;
50
+ private Random random;
51
+ private MsgpackParserPlugin plugin;
52
+ private FileInputRunner runner;
53
+ private MockPageOutput output;
54
+
55
+ @Before
56
+ public void createResources()
57
+ {
58
+ config = config().set("type", "msgpack");
59
+ random = runtime.getRandom();
60
+ plugin = new MsgpackParserPlugin();
61
+ runner = new FileInputRunner(new LocalFileInputPlugin());
62
+ output = new MockPageOutput();
63
+ }
64
+
65
+ @Test
66
+ public void checkDefaultValues()
67
+ {
68
+ ConfigSource config = this.config.deepCopy();
69
+ PluginTask task = config.loadConfig(PluginTask.class);
70
+ assertEquals(FileEncoding.SEQUENCE, task.getFileEncoding());
71
+ assertEquals(RowEncoding.MAP, task.getRowEncoding());
72
+
73
+ // columns
74
+ SchemaConfig schemaConfig = plugin.getSchemaConfig(task);
75
+ assertEquals(1, schemaConfig.getColumnCount());
76
+ assertEquals(Types.JSON, schemaConfig.getColumnType(0));
77
+ }
78
+
79
+ @Test(expected = ConfigException.class)
80
+ public void throwConfigErrorByInvalidFileEncoding()
81
+ {
82
+ ConfigSource config = this.config.deepCopy()
83
+ .set("columns", sampleSchema())
84
+ .set("file_encoding", "invalid");
85
+ config.loadConfig(PluginTask.class);
86
+ }
87
+
88
+ @Test(expected = ConfigException.class)
89
+ public void throwConfigErrorByInvalidRowEncoding()
90
+ {
91
+ ConfigSource config = this.config.deepCopy()
92
+ .set("columns", sampleSchema())
93
+ .set("row_encoding", "invalid");
94
+ config.loadConfig(PluginTask.class);
95
+ }
96
+
97
+ @Test(expected = ConfigException.class)
98
+ public void throwConfigErrorIfSchemalessWithInvalidRowEncoding()
99
+ {
100
+ ConfigSource config = this.config.deepCopy()
101
+ .set("row_encoding", "invalid");
102
+ config.loadConfig(PluginTask.class);
103
+ }
104
+
105
+ @Test
106
+ public void parseArrayArray()
107
+ throws IOException
108
+ {
109
+ SchemaConfig schema = schema(
110
+ column("_c_boolean", Types.BOOLEAN),
111
+ column("_c_string", Types.STRING),
112
+ column("_c_json", Types.JSON),
113
+ column("_c_double", Types.DOUBLE),
114
+ column("_c_long", Types.LONG),
115
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
116
+ );
117
+ ConfigSource config = this.config.deepCopy()
118
+ .set("columns", schema)
119
+ .set("file_encoding", "array")
120
+ .set("row_encoding", "array");
121
+
122
+ boolean vBoolean = random.nextBoolean();
123
+ String vString = nextString(random, random.nextInt(100));
124
+ double vDouble = random.nextDouble();
125
+ long vLong = random.nextLong();
126
+ String vJson = nextString(random, random.nextInt(100));
127
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
128
+
129
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
130
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
131
+ pk.packArrayHeader(1)
132
+ .packArrayHeader(schema.getColumnCount()) // 1 record
133
+ .packBoolean(vBoolean)
134
+ .packString(vString)
135
+ .packString(vJson)
136
+ .packDouble(vDouble)
137
+ .packLong(vLong)
138
+ .packLong(vTimestamp);
139
+ }
140
+
141
+ try (FileInput in = input(out.toByteArray())) {
142
+ transaction(config, input(out.toByteArray()), output);
143
+ }
144
+ }
145
+
146
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
147
+ assertEquals(1, records.size());
148
+ for (Object[] record : records) {
149
+ assertEquals(schema.getColumnCount(), record.length);
150
+ assertEquals(vBoolean, record[0]);
151
+ assertEquals(vString, record[1]);
152
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
153
+ assertEquals(vDouble, (double) record[3], 0.001);
154
+ assertEquals(vLong, record[4]);
155
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
156
+ }
157
+ }
158
+
159
+ @Test
160
+ public void parseSequenceArray()
161
+ throws IOException
162
+ {
163
+ SchemaConfig schema = schema(
164
+ column("_c_boolean", Types.BOOLEAN),
165
+ column("_c_string", Types.STRING),
166
+ column("_c_json", Types.JSON),
167
+ column("_c_double", Types.DOUBLE),
168
+ column("_c_long", Types.LONG),
169
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
170
+ );
171
+ ConfigSource config = this.config.deepCopy()
172
+ .set("columns", schema)
173
+ .set("file_encoding", "sequence")
174
+ .set("row_encoding", "array");
175
+
176
+ boolean vBoolean = random.nextBoolean();
177
+ String vString = nextString(random, random.nextInt(100));
178
+ double vDouble = random.nextDouble();
179
+ long vLong = random.nextLong();
180
+ String vJson = nextString(random, random.nextInt(100));
181
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
182
+
183
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
184
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
185
+ pk.packArrayHeader(schema.getColumnCount()) // 1 record
186
+ .packBoolean(vBoolean)
187
+ .packString(vString)
188
+ .packString(vJson)
189
+ .packDouble(vDouble)
190
+ .packLong(vLong)
191
+ .packLong(vTimestamp);
192
+ }
193
+
194
+ try (FileInput in = input(out.toByteArray())) {
195
+ transaction(config, input(out.toByteArray()), output);
196
+ }
197
+ }
198
+
199
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
200
+ assertEquals(1, records.size());
201
+ for (Object[] record : records) {
202
+ assertEquals(schema.getColumnCount(), record.length);
203
+ assertEquals(vBoolean, record[0]);
204
+ assertEquals(vString, record[1]);
205
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
206
+ assertEquals(vDouble, (double) record[3], 0.001);
207
+ assertEquals(vLong, record[4]);
208
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
209
+ }
210
+ }
211
+
212
+ @Test
213
+ public void parseSequentialSchemalessData()
214
+ throws IOException
215
+ {
216
+ SchemaConfig schema = schema(column("record", Types.JSON));
217
+ ConfigSource config = this.config.deepCopy().set("file_encoding", "sequence");
218
+
219
+ boolean vBoolean = random.nextBoolean();
220
+ String vString = nextString(random, random.nextInt(100));
221
+ double vDouble = random.nextDouble();
222
+ long vLong = random.nextLong();
223
+ String vJson = nextString(random, random.nextInt(100));
224
+
225
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
226
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
227
+ pk.packArrayHeader(5) // 1 record
228
+ .packBoolean(vBoolean)
229
+ .packString(vString)
230
+ .packString(vJson)
231
+ .packDouble(vDouble)
232
+ .packLong(vLong);
233
+ }
234
+
235
+ try (FileInput in = input(out.toByteArray())) {
236
+ transaction(config, input(out.toByteArray()), output);
237
+ }
238
+ }
239
+
240
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
241
+ assertEquals(1, records.size());
242
+ for (Object[] record : records) {
243
+ assertEquals(1, record.length);
244
+ assertTrue(((Value) record[0]).isArrayValue());
245
+ ArrayValue v = ((Value) record[0]).asArrayValue();
246
+ assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
247
+ assertEquals(vString, v.get(1).asStringValue().asString());
248
+ assertEquals(vJson, v.get(2).asStringValue().asString());
249
+ assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
250
+ assertEquals(vLong, v.get(4).asIntegerValue().toLong());
251
+ }
252
+ }
253
+
254
+ @Test
255
+ public void parseSequenceMap()
256
+ throws IOException
257
+ {
258
+ SchemaConfig schema = schema(
259
+ column("_c_boolean", Types.BOOLEAN),
260
+ column("_c_string", Types.STRING),
261
+ column("_c_json", Types.JSON),
262
+ column("_c_double", Types.DOUBLE),
263
+ column("_c_long", Types.LONG),
264
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
265
+ );
266
+ ConfigSource config = this.config.deepCopy()
267
+ .set("columns", schema)
268
+ .set("file_encoding", "sequence")
269
+ .set("row_encoding", "map");
270
+
271
+ boolean vBoolean = random.nextBoolean();
272
+ String vString = nextString(random, random.nextInt(100));
273
+ double vDouble = random.nextDouble();
274
+ long vLong = random.nextLong();
275
+ String vJson = nextString(random, random.nextInt(100));
276
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
277
+
278
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
279
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
280
+ pk.packMapHeader(schema.getColumnCount()) // 1 record
281
+ .packString(schema.getColumnName(0)).packBoolean(vBoolean)
282
+ .packString(schema.getColumnName(1)).packString(vString)
283
+ .packString(schema.getColumnName(2)).packString(vJson)
284
+ .packString(schema.getColumnName(3)).packDouble(vDouble)
285
+ .packString(schema.getColumnName(4)).packLong(vLong)
286
+ .packString(schema.getColumnName(5)).packLong(vTimestamp);
287
+ }
288
+
289
+ try (FileInput in = input(out.toByteArray())) {
290
+ transaction(config, input(out.toByteArray()), output);
291
+ }
292
+ }
293
+
294
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
295
+ assertEquals(1, records.size());
296
+ for (Object[] record : records) {
297
+ assertEquals(schema.getColumnCount(), record.length);
298
+ assertEquals(vBoolean, record[0]);
299
+ assertEquals(vString, record[1]);
300
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
301
+ assertEquals(vDouble, (double) record[3], 0.001);
302
+ assertEquals(vLong, record[4]);
303
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
304
+ }
305
+ }
306
+
307
+ @Test
308
+ public void parseArrayMap()
309
+ throws IOException
310
+ {
311
+ SchemaConfig schema = schema(
312
+ column("_c_boolean", Types.BOOLEAN),
313
+ column("_c_string", Types.STRING),
314
+ column("_c_json", Types.JSON),
315
+ column("_c_double", Types.DOUBLE),
316
+ column("_c_long", Types.LONG),
317
+ column("_c_timestamp", Types.TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S"))
318
+ );
319
+ ConfigSource config = this.config.deepCopy()
320
+ .set("columns", schema)
321
+ .set("file_encoding", "array")
322
+ .set("row_encoding", "map");
323
+
324
+ boolean vBoolean = random.nextBoolean();
325
+ String vString = nextString(random, random.nextInt(100));
326
+ double vDouble = random.nextDouble();
327
+ long vLong = random.nextLong();
328
+ String vJson = nextString(random, random.nextInt(100));
329
+ long vTimestamp = nextUnixtime(random, "2013-01-01 00:00:00", 1000);
330
+
331
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
332
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
333
+ pk.packArrayHeader(1)
334
+ .packMapHeader(schema.getColumnCount()) // 1 record
335
+ .packString(schema.getColumnName(0)).packBoolean(vBoolean)
336
+ .packString(schema.getColumnName(1)).packString(vString)
337
+ .packString(schema.getColumnName(2)).packString(vJson)
338
+ .packString(schema.getColumnName(3)).packDouble(vDouble)
339
+ .packString(schema.getColumnName(4)).packLong(vLong)
340
+ .packString(schema.getColumnName(5)).packLong(vTimestamp);
341
+ }
342
+
343
+ try (FileInput in = input(out.toByteArray())) {
344
+ transaction(config, input(out.toByteArray()), output);
345
+ }
346
+ }
347
+
348
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
349
+ assertEquals(1, records.size());
350
+ for (Object[] record : records) {
351
+ assertEquals(schema.getColumnCount(), record.length);
352
+ assertEquals(vBoolean, record[0]);
353
+ assertEquals(vString, record[1]);
354
+ assertEquals(vJson, ((Value) record[2]).asStringValue().asString());
355
+ assertEquals(vDouble, (double) record[3], 0.001);
356
+ assertEquals(vLong, record[4]);
357
+ assertEquals(vTimestamp, ((Timestamp) record[5]).getEpochSecond());
358
+ }
359
+ }
360
+
361
+ @Test
362
+ public void parseArraySchemalessData()
363
+ throws IOException
364
+ {
365
+ SchemaConfig schema = schema(column("record", Types.JSON));
366
+ ConfigSource config = this.config.deepCopy().set("file_encoding", "array");
367
+
368
+ boolean vBoolean = random.nextBoolean();
369
+ String vString = nextString(random, random.nextInt(100));
370
+ double vDouble = random.nextDouble();
371
+ long vLong = random.nextLong();
372
+ String vJson = nextString(random, random.nextInt(100));
373
+
374
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
375
+ try (MessagePacker pk = MessagePack.newDefaultPacker(out)) {
376
+ pk.packArrayHeader(1)
377
+ .packArrayHeader(5) // 1 record
378
+ .packBoolean(vBoolean)
379
+ .packString(vString)
380
+ .packString(vJson)
381
+ .packDouble(vDouble)
382
+ .packLong(vLong);
383
+ }
384
+
385
+ try (FileInput in = input(out.toByteArray())) {
386
+ transaction(config, input(out.toByteArray()), output);
387
+ }
388
+ }
389
+
390
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
391
+ assertEquals(1, records.size());
392
+ for (Object[] record : records) {
393
+ assertEquals(1, record.length);
394
+ assertTrue(((Value) record[0]).isArrayValue());
395
+ ArrayValue v = ((Value) record[0]).asArrayValue();
396
+ assertEquals(vBoolean, v.get(0).asBooleanValue().getBoolean());
397
+ assertEquals(vString, v.get(1).asStringValue().asString());
398
+ assertEquals(vJson, v.get(2).asStringValue().asString());
399
+ assertEquals(vDouble, v.get(3).asFloatValue().toDouble(), 0.001);
400
+ assertEquals(vLong, v.get(4).asIntegerValue().toLong());
401
+ }
402
+ }
403
+
404
+ private ConfigSource config()
405
+ {
406
+ return runtime.getExec().newConfigSource();
407
+ }
408
+
409
+ private SchemaConfig sampleSchema()
410
+ {
411
+ return schema(column("_c0", Types.STRING));
412
+ }
413
+
414
+ private SchemaConfig schema(ColumnConfig... columns)
415
+ {
416
+ return new SchemaConfig(Lists.newArrayList(columns));
417
+ }
418
+
419
+ private ColumnConfig column(String name, Type type)
420
+ {
421
+ return column(name, type, config());
422
+ }
423
+
424
+ private ColumnConfig column(String name, Type type, ConfigSource config)
425
+ {
426
+ return new ColumnConfig(name, type, config);
427
+ }
428
+
429
+ private void transaction(ConfigSource config, final FileInput input, final MockPageOutput output)
430
+ {
431
+ plugin.transaction(config, new ParserPlugin.Control()
432
+ {
433
+ @Override
434
+ public void run(TaskSource taskSource, Schema schema)
435
+ {
436
+ plugin.run(taskSource, schema, input, output);
437
+ }
438
+ });
439
+ }
440
+
441
+ private FileInput input(byte[] bytes)
442
+ {
443
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(new ByteArrayInputStream(bytes)));
444
+ }
445
+
446
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
447
+ {
448
+ return new InputStreamFileInput.IteratorProvider(ImmutableList.copyOf(inputStreams));
449
+ }
450
+
451
+ private static String nextString(Random random, int lengthBound)
452
+ {
453
+ char[] text = new char[lengthBound];
454
+ for (int i = 0; i < text.length; i++) {
455
+ text[i] = (char) random.nextInt(255);
456
+ }
457
+ return new String(text);
458
+ }
459
+
460
+ private static long nextUnixtime(Random random, String baseTime, int bound)
461
+ {
462
+ long baseUnixtime = java.sql.Timestamp.valueOf(baseTime).getTime();
463
+ return baseUnixtime + random.nextInt(bound);
464
+ }
465
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-msgpack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-24 00:00:00.000000000 Z
11
+ date: 2016-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Parses files encoded in MessagePack.
42
42
  email:
43
43
  - frsyuki@gmail.com
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - COPYING
50
51
  - ChangeLog
51
52
  - README.md
@@ -59,8 +60,8 @@ files:
59
60
  - lib/embulk/guess/msgpack.rb
60
61
  - lib/embulk/parser/msgpack.rb
61
62
  - src/main/java/org/embulk/parser/msgpack/MsgpackParserPlugin.java
62
- - src/test/java/org/embulk/parser/TestMsgpackParserPlugin.java
63
- - classpath/embulk-parser-msgpack-0.2.1.jar
63
+ - src/test/java/org/embulk/parser/msgpack/TestMsgpackParserPlugin.java
64
+ - classpath/embulk-parser-msgpack-0.2.2.jar
64
65
  homepage: https://github.com/frsyuki/embulk-parser-msgpack
65
66
  licenses:
66
67
  - Apache 2.0
@@ -1,5 +0,0 @@
1
- package org.embulk.parser;
2
-
3
- public class TestMsgpackParserPlugin
4
- {
5
- }