embulk-filter-expand_json 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ package org.embulk.filter.expand_json;
2
+
3
+ import com.jayway.jsonpath.spi.cache.NOOPCache;
4
+
5
+ // This class is used for test: testUseUserDefiledCacheProvider
6
+ public class MyNOOPCache
7
+ extends NOOPCache
8
+ {
9
+ }
@@ -6,6 +6,9 @@ import com.google.common.base.Throwables;
6
6
  import com.google.common.collect.ImmutableList;
7
7
  import com.google.common.collect.ImmutableMap;
8
8
  import com.jayway.jsonpath.InvalidJsonException;
9
+ import com.jayway.jsonpath.spi.cache.CacheProvider;
10
+ import com.jayway.jsonpath.spi.cache.LRUCache;
11
+ import com.jayway.jsonpath.spi.cache.NOOPCache;
9
12
  import org.embulk.EmbulkTestRuntime;
10
13
  import org.embulk.config.ConfigException;
11
14
  import org.embulk.config.ConfigLoader;
@@ -31,11 +34,18 @@ import org.junit.rules.ExpectedException;
31
34
  import org.msgpack.value.MapValue;
32
35
  import org.msgpack.value.Value;
33
36
 
37
+ import java.lang.reflect.Field;
34
38
  import java.util.List;
39
+ import java.util.Optional;
35
40
 
36
41
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
37
42
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
38
- import static org.embulk.spi.type.Types.*;
43
+ import static org.embulk.spi.type.Types.BOOLEAN;
44
+ import static org.embulk.spi.type.Types.DOUBLE;
45
+ import static org.embulk.spi.type.Types.JSON;
46
+ import static org.embulk.spi.type.Types.LONG;
47
+ import static org.embulk.spi.type.Types.STRING;
48
+ import static org.embulk.spi.type.Types.TIMESTAMP;
39
49
  import static org.junit.Assert.assertEquals;
40
50
  import static org.junit.Assert.assertNull;
41
51
  import static org.junit.Assert.assertTrue;
@@ -70,6 +80,25 @@ public class TestExpandJsonFilterPlugin
70
80
  expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
71
81
  }
72
82
 
83
+ @Before
84
+ public void clearCacheProvider()
85
+ {
86
+ // NOTE: CacheProvider has cache as private static variables,
87
+ // so clear the variables before tests run.
88
+ try {
89
+ Class<?> klass = Class.forName(CacheProvider.class.getName());
90
+ Field cache = klass.getDeclaredField("cache");
91
+ cache.setAccessible(true);
92
+ cache.set(null, null);
93
+ Field cachingEnabled = klass.getDeclaredField("cachingEnabled");
94
+ cachingEnabled.setAccessible(true);
95
+ cachingEnabled.setBoolean(null, false);
96
+ }
97
+ catch (IllegalAccessException | NoSuchFieldException | ClassNotFoundException e) {
98
+ Throwables.propagate(e);
99
+ }
100
+ }
101
+
73
102
  private ConfigSource getConfigFromYaml(String yaml)
74
103
  {
75
104
  ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
@@ -229,6 +258,24 @@ public class TestExpandJsonFilterPlugin
229
258
  });
230
259
  }
231
260
 
261
+ @Test
262
+ public void testThrowExceptionUnsupportedCacheProvider()
263
+ {
264
+ String configYaml = "" +
265
+ "type: expand_json\n" +
266
+ "json_column_name: _c0\n" +
267
+ "cache_provider: unsupported_cache_provider\n" +
268
+ "expanded_columns:\n" +
269
+ " - {name: _e1, type: string}";
270
+ ConfigSource config = getConfigFromYaml(configYaml);
271
+
272
+ exception.expect(ConfigException.class);
273
+ exception.expectMessage("Cache Provider 'unsupported_cache_provider' is not supported: unsupported_cache_provider.");
274
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
275
+ // do nothing
276
+ });
277
+ }
278
+
232
279
  @Test
233
280
  public void testDefaultValue()
234
281
  {
@@ -251,6 +298,42 @@ public class TestExpandJsonFilterPlugin
251
298
  assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
252
299
  assertEquals(false, task.getStopOnInvalidRecord());
253
300
  assertEquals(false, task.getKeepExpandingJsonColumn());
301
+ assertEquals(Optional.empty(), task.getCacheProviderName());
302
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
303
+ assertEquals(LRUCache.class, CacheProvider.getCache().getClass());
304
+ });
305
+ }
306
+
307
+ @Test
308
+ public void testUseNOOPCacheProvider()
309
+ {
310
+ String configYaml = "" +
311
+ "type: expand_json\n" +
312
+ "json_column_name: _c0\n" +
313
+ "cache_provider: noop\n" +
314
+ "expanded_columns:\n" +
315
+ " - {name: _e0, type: string}";
316
+ ConfigSource config = getConfigFromYaml(configYaml);
317
+
318
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
319
+ assertEquals(NOOPCache.class, CacheProvider.getCache().getClass());
320
+ });
321
+ }
322
+
323
+ @Test
324
+ public void testUseUserDefiledCacheProvider()
325
+ {
326
+ String configYaml = "" +
327
+ "type: expand_json\n" +
328
+ "json_column_name: _c0\n" +
329
+ "cache_provider: " + MyNOOPCache.class.getName() + "\n" +
330
+ "expanded_columns:\n" +
331
+ " - {name: _e0, type: string}";
332
+ ConfigSource config = getConfigFromYaml(configYaml);
333
+
334
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
335
+ assertEquals(MyNOOPCache.class, CacheProvider.getCache().getClass());
336
+ });
254
337
  }
255
338
 
256
339
  /*
@@ -368,7 +451,7 @@ public class TestExpandJsonFilterPlugin
368
451
  fail();
369
452
  }
370
453
  catch (Throwable t) {
371
- assertTrue(t instanceof DataException);
454
+ assertEquals(DataException.class, t.getClass());
372
455
  }
373
456
  }
374
457
  }
@@ -1015,6 +1098,761 @@ public class TestExpandJsonFilterPlugin
1015
1098
  });
1016
1099
  }
1017
1100
 
1101
+ // with NOOPCacheProvider
1102
+ // NOTE: The below tests are the same as the above tests except 'cache_provider' setting.
1103
+
1104
+ @Test
1105
+ public void testUnchangedColumnValuesWithNOOPCacheProvider()
1106
+ {
1107
+ String configYaml = "" +
1108
+ "type: expand_json\n" +
1109
+ "json_column_name: _c6\n" +
1110
+ "root: $.\n" +
1111
+ "cache_provider: noop\n" +
1112
+ "expanded_columns:\n" +
1113
+ " - {name: _e0, type: string}\n";
1114
+ final ConfigSource config = getConfigFromYaml(configYaml);
1115
+ final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
1116
+ "_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
1117
+
1118
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1119
+ {
1120
+ @Override
1121
+ public void run(TaskSource taskSource, Schema outputSchema)
1122
+ {
1123
+ MockPageOutput mockPageOutput = new MockPageOutput();
1124
+
1125
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1126
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1127
+ "_v0", // _c0
1128
+ true, // _c1
1129
+ 0.2, // _c2
1130
+ 3L, // _c3
1131
+ Timestamp.ofEpochSecond(4), // _c4
1132
+ newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
1133
+ "{\"_e0\":\"_v6\"}")) {
1134
+ pageOutput.add(page);
1135
+ }
1136
+
1137
+ pageOutput.finish();
1138
+ }
1139
+
1140
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
1141
+ assertEquals(1, records.size());
1142
+
1143
+ Object[] record = records.get(0);
1144
+ assertEquals("_v0", record[0]);
1145
+ assertEquals(true, record[1]);
1146
+ assertEquals(0.2, (double) record[2], 0.0001);
1147
+ assertEquals(3L, record[3]);
1148
+ assertEquals(Timestamp.ofEpochSecond(4), record[4]);
1149
+ assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
1150
+ }
1151
+ });
1152
+ }
1153
+
1154
+ @Test
1155
+ public void testStopOnInvalidRecordOptionWithNOOPCacheProvider()
1156
+ {
1157
+ String configYaml = "" +
1158
+ "type: expand_json\n" +
1159
+ "json_column_name: _c0\n" +
1160
+ "root: $.\n" +
1161
+ "cache_provider: noop\n" +
1162
+ "expanded_columns:\n" +
1163
+ " - {name: _e0, type: json}\n";
1164
+ final ConfigSource conf = getConfigFromYaml(configYaml);
1165
+ final Schema schema = schema("_c0", STRING);
1166
+
1167
+ { // stop_on_invalid_record: false
1168
+ ConfigSource config = conf.deepCopy();
1169
+
1170
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1171
+ {
1172
+ @Override
1173
+ public void run(TaskSource taskSource, Schema outputSchema)
1174
+ {
1175
+ MockPageOutput mockPageOutput = new MockPageOutput();
1176
+
1177
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1178
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1179
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
1180
+ pageOutput.add(page);
1181
+ }
1182
+
1183
+ pageOutput.finish();
1184
+ }
1185
+
1186
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
1187
+ assertEquals(1, records.size());
1188
+ assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
1189
+ }
1190
+ });
1191
+ }
1192
+
1193
+ // NOTE: CacheProvider is set the above test, so need to clear the CacheProvider before the below test.
1194
+ clearCacheProvider();
1195
+ { // stop_on_invalid_record: true
1196
+ ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
1197
+ try {
1198
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1199
+ {
1200
+ @Override
1201
+ public void run(TaskSource taskSource, Schema outputSchema)
1202
+ {
1203
+ MockPageOutput mockPageOutput = new MockPageOutput();
1204
+
1205
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1206
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1207
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
1208
+ pageOutput.add(page);
1209
+ }
1210
+
1211
+ pageOutput.finish();
1212
+ }
1213
+ }
1214
+ });
1215
+ fail();
1216
+ }
1217
+ catch (Throwable t) {
1218
+ t.printStackTrace();
1219
+ assertEquals(DataException.class, t.getClass());
1220
+ }
1221
+ }
1222
+ }
1223
+
1224
+ @Test
1225
+ public void testExpandJsonKeyToSchemaWithNOOPCacheProvider()
1226
+ {
1227
+ String configYaml = "" +
1228
+ "type: expand_json\n" +
1229
+ "json_column_name: _c0\n" +
1230
+ "root: $.\n" +
1231
+ "cache_provider: noop\n" +
1232
+ "expanded_columns:\n" +
1233
+ " - {name: _j1, type: boolean}\n" +
1234
+ " - {name: _j2, type: long}\n" +
1235
+ " - {name: _j3, type: timestamp}\n" +
1236
+ " - {name: _j4, type: double}\n" +
1237
+ " - {name: _j5, type: string}\n" +
1238
+ " - {name: _j6, type: json}\n" +
1239
+ " - {name: _c0, type: string}\n";
1240
+
1241
+ ConfigSource config = getConfigFromYaml(configYaml);
1242
+
1243
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1244
+ {
1245
+ @Override
1246
+ public void run(TaskSource taskSource, Schema outputSchema)
1247
+ {
1248
+ assertEquals(8, outputSchema.getColumnCount());
1249
+
1250
+ Column new_j1 = outputSchema.getColumn(0);
1251
+ Column new_j2 = outputSchema.getColumn(1);
1252
+ Column new_j3 = outputSchema.getColumn(2);
1253
+ Column new_j4 = outputSchema.getColumn(3);
1254
+ Column new_j5 = outputSchema.getColumn(4);
1255
+ Column new_j6 = outputSchema.getColumn(5);
1256
+ Column new_c0 = outputSchema.getColumn(6);
1257
+ Column old_c1 = outputSchema.getColumn(7);
1258
+
1259
+ assertEquals("_j1", new_j1.getName());
1260
+ assertEquals(BOOLEAN, new_j1.getType());
1261
+ assertEquals("_j2", new_j2.getName());
1262
+ assertEquals(LONG, new_j2.getType());
1263
+ assertEquals("_j3", new_j3.getName());
1264
+ assertEquals(TIMESTAMP, new_j3.getType());
1265
+ assertEquals("_j4", new_j4.getName());
1266
+ assertEquals(DOUBLE, new_j4.getType());
1267
+ assertEquals("_j5", new_j5.getName());
1268
+ assertEquals(STRING, new_j5.getType());
1269
+ assertEquals("_j6", new_j6.getName());
1270
+ assertEquals(JSON, new_j6.getType());
1271
+ assertEquals("_c0", new_c0.getName());
1272
+ assertEquals(STRING, new_c0.getType());
1273
+ assertEquals("_c1", old_c1.getName());
1274
+ assertEquals(STRING, old_c1.getType());
1275
+ }
1276
+ });
1277
+ }
1278
+
1279
+ @Test
1280
+ public void testColumnBasedTimezoneWithNOOPCacheProvider()
1281
+ {
1282
+ String configYaml = "" +
1283
+ "type: expand_json\n" +
1284
+ "json_column_name: _c0\n" +
1285
+ "root: $.\n" +
1286
+ "cache_provider: noop\n" +
1287
+ "expanded_columns:\n" +
1288
+ " - {name: _j0, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1289
+ " - {name: _j1, type: timestamp, format: '%Y-%m-%d %H:%M:%S', timezone: 'Asia/Tokyo'}\n";
1290
+
1291
+ ConfigSource config = getConfigFromYaml(configYaml);
1292
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1293
+
1294
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1295
+ {
1296
+ @Override
1297
+ public void run(TaskSource taskSource, Schema outputSchema)
1298
+ {
1299
+ MockPageOutput mockPageOutput = new MockPageOutput();
1300
+ Value data = newMapBuilder()
1301
+ .put(s("_j0"), s("2014-10-21 04:44:33 +0000"))
1302
+ .put(s("_j1"), s("2014-10-21 04:44:33"))
1303
+ .build();
1304
+
1305
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1306
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1307
+ pageOutput.add(page);
1308
+ }
1309
+
1310
+ pageOutput.finish();
1311
+ }
1312
+
1313
+ PageReader pageReader = new PageReader(outputSchema);
1314
+
1315
+ for (Page page : mockPageOutput.pages) {
1316
+ pageReader.setPage(page);
1317
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(0)).toString());
1318
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(1)).toString());
1319
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(2)));
1320
+ }
1321
+ }
1322
+ });
1323
+ }
1324
+
1325
+ @Test
1326
+ public void testExpandJsonValuesFromJsonWithNOOPCacheProvider()
1327
+ {
1328
+ String configYaml = "" +
1329
+ "type: expand_json\n" +
1330
+ "json_column_name: _c0\n" +
1331
+ "root: $.\n" +
1332
+ "default_timezone: Asia/Tokyo\n" +
1333
+ "cache_provider: noop\n" +
1334
+ "expanded_columns:\n" +
1335
+ " - {name: _j0, type: boolean}\n" +
1336
+ " - {name: _j1, type: long}\n" +
1337
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1338
+ " - {name: _j3, type: double}\n" +
1339
+ " - {name: _j4, type: string}\n" +
1340
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1341
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
1342
+ // JsonPath: https://github.com/jayway/JsonPath
1343
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
1344
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
1345
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
1346
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
1347
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
1348
+ " - {name: _c0, type: string}\n";
1349
+
1350
+ ConfigSource config = getConfigFromYaml(configYaml);
1351
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1352
+
1353
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1354
+ {
1355
+ @Override
1356
+ public void run(TaskSource taskSource, Schema outputSchema)
1357
+ {
1358
+ MockPageOutput mockPageOutput = new MockPageOutput();
1359
+ Value data = newMapBuilder()
1360
+ .put(s("_j0"), b(true))
1361
+ .put(s("_j1"), i(2))
1362
+ .put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
1363
+ .put(s("_j3"), f(4.4))
1364
+ .put(s("_j4"), s("v5"))
1365
+ .put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
1366
+ .put(s("_j6"), s("2014-10-21 04:44:33"))
1367
+ .put(s("_j7"), newMapBuilder()
1368
+ .put(s("store"), newMapBuilder()
1369
+ .put(s("book"), newArray(
1370
+ newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
1371
+ newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
1372
+ newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
1373
+ newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
1374
+ ))
1375
+ .put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
1376
+ .build())
1377
+ .put(s("expensive"), i(10))
1378
+ .build())
1379
+ .put(s("_c0"), s("v12"))
1380
+ .build();
1381
+
1382
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1383
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1384
+ pageOutput.add(page);
1385
+ }
1386
+
1387
+ pageOutput.finish();
1388
+ }
1389
+
1390
+ PageReader pageReader = new PageReader(outputSchema);
1391
+
1392
+ for (Page page : mockPageOutput.pages) {
1393
+ pageReader.setPage(page);
1394
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
1395
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
1396
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
1397
+ assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
1398
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
1399
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
1400
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
1401
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
1402
+ pageReader.getString(outputSchema.getColumn(7)));
1403
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
1404
+ assertEquals("" +
1405
+ "[" +
1406
+ "{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
1407
+ "{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
1408
+ "]",
1409
+ pageReader.getString(outputSchema.getColumn(9)));
1410
+ assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
1411
+ assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
1412
+ assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
1413
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
1414
+ }
1415
+ }
1416
+ });
1417
+ }
1418
+
1419
+ @Test(expected = DataException.class)
1420
+ public void testSetExpandedJsonColumnsSetInvalidDoubleValueWithNOOPCacheProvider()
1421
+ {
1422
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("double", s("abcde"));
1423
+ }
1424
+
1425
+ @Test(expected = DataException.class)
1426
+ public void testSetExpandedJsonColumnsSetInvalidLongValueWithNOOPCacheProvider()
1427
+ {
1428
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("long", s("abcde"));
1429
+ }
1430
+
1431
+ @Test(expected = DataException.class)
1432
+ public void testSetExpandedJsonColumnsSetInvalidTimestampValueWithNOOPCacheProvider()
1433
+ {
1434
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("timestamp", s("abcde"));
1435
+ }
1436
+
1437
+ @Test(expected = DataException.class)
1438
+ public void testSetExpandedJsonColumnsSetInvalidJsonValueWithNOOPCacheProvider()
1439
+ {
1440
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("json", s("abcde"));
1441
+ }
1442
+
1443
+ public void setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider(String ValidType, final Value invalidValue)
1444
+ {
1445
+ String configYaml = "" +
1446
+ "type: expand_json\n" +
1447
+ "stop_on_invalid_record: 1\n" +
1448
+ "json_column_name: _c0\n" +
1449
+ "root: $.\n" +
1450
+ "default_timezone: Asia/Tokyo\n" +
1451
+ "cache_provider: noop\n" +
1452
+ "expanded_columns:\n" +
1453
+ " - {name: _j0, type: " + ValidType + "}\n";
1454
+
1455
+ ConfigSource config = getConfigFromYaml(configYaml);
1456
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1457
+
1458
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1459
+ {
1460
+ @Override
1461
+ public void run(TaskSource taskSource, Schema outputSchema)
1462
+ {
1463
+ MockPageOutput mockPageOutput = new MockPageOutput();
1464
+ Value data = newMapBuilder()
1465
+ .put(s("_j0"), invalidValue)
1466
+ .build();
1467
+
1468
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1469
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1470
+ pageOutput.add(page);
1471
+ }
1472
+
1473
+ pageOutput.finish();
1474
+ }
1475
+ }
1476
+ });
1477
+ }
1478
+
1479
+ @Test
1480
+ public void testExpandedJsonValuesWithKeepJsonColumnsWithNOOPCacheProvider()
1481
+ {
1482
+ final String configYaml = "" +
1483
+ "type: expand_json\n" +
1484
+ "json_column_name: _c1\n" +
1485
+ "root: $.\n" +
1486
+ "cache_provider: noop\n" +
1487
+ "expanded_columns:\n" +
1488
+ " - {name: _e0, type: string}\n" +
1489
+ "keep_expanding_json_column: true\n";
1490
+
1491
+ ConfigSource config = getConfigFromYaml(configYaml);
1492
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
1493
+
1494
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1495
+ {
1496
+ @Override
1497
+ public void run(TaskSource taskSource, Schema outputSchema)
1498
+ {
1499
+ MockPageOutput mockPageOutput = new MockPageOutput();
1500
+
1501
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1502
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1503
+ "_v0", "{\"_e0\":\"_ev0\"}")) {
1504
+ pageOutput.add(page);
1505
+ }
1506
+
1507
+ pageOutput.finish();
1508
+ }
1509
+
1510
+ assertEquals(3, outputSchema.getColumnCount());
1511
+ Column column;
1512
+ { // 1st column
1513
+ column = outputSchema.getColumn(0);
1514
+ assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
1515
+ }
1516
+ { // 2nd column
1517
+ column = outputSchema.getColumn(1);
1518
+ assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
1519
+ }
1520
+ { // 3rd column
1521
+ column = outputSchema.getColumn(2);
1522
+ assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
1523
+ }
1524
+
1525
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
1526
+ assertEquals("_v0", record[0]);
1527
+ assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
1528
+ assertEquals("_ev0", record[2]);
1529
+ }
1530
+ }
1531
+ });
1532
+ }
1533
+
1534
+ @Test
1535
+ public void testExpandSpecialJsonValuesFromStringWithNOOPCacheProvider()
1536
+ {
1537
+ final String configYaml = "" +
1538
+ "type: expand_json\n" +
1539
+ "json_column_name: _c1\n" +
1540
+ "root: $.\n" +
1541
+ "cache_provider: noop\n" +
1542
+ "expanded_columns:\n" +
1543
+ " - {name: _e0, type: string}\n" +
1544
+ " - {name: _e1, type: string}\n"; // the value will be null
1545
+
1546
+ ConfigSource config = getConfigFromYaml(configYaml);
1547
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
1548
+
1549
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1550
+ {
1551
+ @Override
1552
+ public void run(TaskSource taskSource, Schema outputSchema)
1553
+ {
1554
+ MockPageOutput mockPageOutput = new MockPageOutput();
1555
+
1556
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1557
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1558
+ "_v0", "")) {
1559
+ pageOutput.add(page);
1560
+ }
1561
+
1562
+ pageOutput.finish();
1563
+ }
1564
+
1565
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
1566
+ assertEquals("_v0", record[0]);
1567
+ assertNull(record[1]);
1568
+ assertNull(record[2]);
1569
+ }
1570
+ }
1571
+ });
1572
+ }
1573
+
1574
+ @Test
1575
+ public void testExpandJsonValuesFromStringWithNOOPCacheProvider()
1576
+ {
1577
+ String configYaml = "" +
1578
+ "type: expand_json\n" +
1579
+ "json_column_name: _c0\n" +
1580
+ "root: $.\n" +
1581
+ "default_timezone: Asia/Tokyo\n" +
1582
+ "cache_provider: noop\n" +
1583
+ "expanded_columns:\n" +
1584
+ " - {name: _j0, type: boolean}\n" +
1585
+ " - {name: _j1, type: long}\n" +
1586
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1587
+ " - {name: _j3, type: double}\n" +
1588
+ " - {name: _j4, type: string}\n" +
1589
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1590
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
1591
+ // JsonPath: https://github.com/jayway/JsonPath
1592
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
1593
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
1594
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
1595
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
1596
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
1597
+ " - {name: _c0, type: string}\n";
1598
+
1599
+ ConfigSource config = getConfigFromYaml(configYaml);
1600
+
1601
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1602
+ {
1603
+ @Override
1604
+ public void run(TaskSource taskSource, Schema outputSchema)
1605
+ {
1606
+ MockPageOutput mockPageOutput = new MockPageOutput();
1607
+ PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource,
1608
+ schema,
1609
+ outputSchema,
1610
+ mockPageOutput);
1611
+
1612
+ ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
1613
+ builder.put("_j0", true);
1614
+ builder.put("_j1", 2);
1615
+ builder.put("_j2", "2014-10-21 04:44:33 +0900");
1616
+ builder.put("_j3", 4.4);
1617
+ builder.put("_j4", "v5");
1618
+ builder.put("_j5", "2014-10-21 04:44:33 +0000");
1619
+ builder.put("_j6", "2014-10-21 04:44:33");
1620
+ builder.put("_j7",
1621
+ ImmutableMap.of("store",
1622
+ ImmutableMap.of("book",
1623
+ ImmutableList.of(ImmutableMap.of("author",
1624
+ "Nigel Rees",
1625
+ "title",
1626
+ "Sayings of the Century",
1627
+ "price",
1628
+ 8.95),
1629
+ ImmutableMap.of("author",
1630
+ "Evelyn Waugh",
1631
+ "title",
1632
+ "Sword of Honour",
1633
+ "price",
1634
+ 12.99),
1635
+ ImmutableMap.of("author",
1636
+ "Herman Melville",
1637
+ "title",
1638
+ "Moby Dick",
1639
+ "isbn",
1640
+ "0-553-21311-3",
1641
+ "price",
1642
+ 8.99),
1643
+ ImmutableMap.of("author",
1644
+ "J. R. R. Tolkien",
1645
+ "title",
1646
+ "The Lord of the Rings",
1647
+ "isbn",
1648
+ "0-395-19395-8",
1649
+ "price",
1650
+ 22.99)
1651
+ ),
1652
+ "bicycle",
1653
+ ImmutableMap.of("color",
1654
+ "red",
1655
+ "price",
1656
+ 19.95
1657
+ )
1658
+ ),
1659
+ "expensive",
1660
+ 10
1661
+ )
1662
+ /*
1663
+ {
1664
+ "store": {
1665
+ "book": [
1666
+ {
1667
+ "author": "Nigel Rees",
1668
+ "title": "Sayings of the Century",
1669
+ "price": 8.95
1670
+ },
1671
+ {
1672
+ "author": "Evelyn Waugh",
1673
+ "title": "Sword of Honour",
1674
+ "price": 12.99
1675
+ },
1676
+ {
1677
+ "author": "Herman Melville",
1678
+ "title": "Moby Dick",
1679
+ "isbn": "0-553-21311-3",
1680
+ "price": 8.99
1681
+ },
1682
+ {
1683
+ "author": "J. R. R. Tolkien",
1684
+ "title": "The Lord of the Rings",
1685
+ "isbn": "0-395-19395-8",
1686
+ "price": 22.99
1687
+ }
1688
+ ],
1689
+ "bicycle": {
1690
+ "color": "red",
1691
+ "price": 19.95
1692
+ }
1693
+ },
1694
+ "expensive": 10
1695
+ }
1696
+ */
1697
+ );
1698
+ builder.put("_c0", "v12");
1699
+
1700
+ String data = convertToJsonString(builder.build());
1701
+
1702
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
1703
+ schema,
1704
+ data, c1Data)) {
1705
+ pageOutput.add(page);
1706
+ }
1707
+
1708
+ pageOutput.finish();
1709
+ pageOutput.close();
1710
+
1711
+ PageReader pageReader = new PageReader(outputSchema);
1712
+
1713
+ for (Page page : mockPageOutput.pages) {
1714
+ pageReader.setPage(page);
1715
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
1716
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
1717
+ assertEquals("2014-10-20 19:44:33 UTC",
1718
+ pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
1719
+ assertEquals(String.valueOf(4.4),
1720
+ String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
1721
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
1722
+ assertEquals("2014-10-21 04:44:33 UTC",
1723
+ pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
1724
+ assertEquals("2014-10-20 19:44:33 UTC",
1725
+ pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
1726
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
1727
+ pageReader.getString(outputSchema.getColumn(7)));
1728
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]",
1729
+ pageReader.getString(outputSchema.getColumn(8)));
1730
+ assertEquals("" +
1731
+ "[" +
1732
+ "{" +
1733
+ "\"author\":\"Herman Melville\"," +
1734
+ "\"title\":\"Moby Dick\"," +
1735
+ "\"isbn\":\"0-553-21311-3\"," +
1736
+ "\"price\":8.99" +
1737
+ "}," +
1738
+ "{" +
1739
+ "\"author\":\"J. R. R. Tolkien\"," +
1740
+ "\"title\":\"The Lord of the Rings\"," +
1741
+ "\"isbn\":\"0-395-19395-8\"," +
1742
+ "\"price\":22.99" +
1743
+ "}" +
1744
+ "]",
1745
+ pageReader.getString(outputSchema.getColumn(9)));
1746
+ assertEquals("[\"Sayings of the Century\"]",
1747
+ pageReader.getString(outputSchema.getColumn(10)));
1748
+ assertEquals("Herman Melville",
1749
+ pageReader.getString(outputSchema.getColumn(11)));
1750
+ assertEquals("v12",
1751
+ pageReader.getString(outputSchema.getColumn(12)));
1752
+ assertEquals(c1Data,
1753
+ pageReader.getString(outputSchema.getColumn(13)));
1754
+ }
1755
+ }
1756
+ });
1757
+ }
1758
+
1759
+ @Test
1760
+ public void testAbortBrokenJsonStringWithNOOPCacheProvider()
1761
+ {
1762
+ String configYaml = "" +
1763
+ "type: expand_json\n" +
1764
+ "json_column_name: _c0\n" +
1765
+ "root: $.\n" +
1766
+ "default_timezone: Asia/Tokyo\n" +
1767
+ "cache_provider: noop\n" +
1768
+ "expanded_columns:\n" +
1769
+ " - {name: _j0, type: string}\n";
1770
+ ConfigSource config = getConfigFromYaml(configYaml);
1771
+
1772
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1773
+ {
1774
+ @Override
1775
+ public void run(TaskSource taskSource, Schema outputSchema)
1776
+ {
1777
+ MockPageOutput mockPageOutput = new MockPageOutput();
1778
+ PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource,
1779
+ schema,
1780
+ outputSchema,
1781
+ mockPageOutput);
1782
+
1783
+ String data = getBrokenJsonString();
1784
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
1785
+ schema,
1786
+ data, c1Data)) {
1787
+ exception.expect(InvalidJsonException.class);
1788
+ exception.expectMessage("Unexpected End Of File position 12: null");
1789
+ pageOutput.add(page);
1790
+ }
1791
+
1792
+ pageOutput.finish();
1793
+ pageOutput.close();
1794
+
1795
+ PageReader pageReader = new PageReader(outputSchema);
1796
+
1797
+ for (Page page : mockPageOutput.pages) {
1798
+ pageReader.setPage(page);
1799
+ assertEquals("te", pageReader.getString(outputSchema.getColumn(0)));
1800
+ }
1801
+ }
1802
+ });
1803
+ }
1804
+
1805
+ @Test
1806
+ public void testParseNumbersInExponentialNotationWithNOOPCacheProvider()
1807
+ {
1808
+ final String configYaml = "" +
1809
+ "type: expand_json\n" +
1810
+ "json_column_name: _c1\n" +
1811
+ "root: $.\n" +
1812
+ "cache_provider: noop\n" +
1813
+ "expanded_columns:\n" +
1814
+ " - {name: _j0, type: double}\n" +
1815
+ " - {name: _j1, type: long}\n";
1816
+ ConfigSource config = getConfigFromYaml(configYaml);
1817
+ final Schema schema = schema("_c1", STRING);
1818
+
1819
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1820
+ {
1821
+ @Override
1822
+ public void run(TaskSource taskSource, Schema outputSchema)
1823
+ {
1824
+ MockPageOutput mockPageOutput = new MockPageOutput();
1825
+
1826
+ String doubleFloatingPoint = "-1.234e-5";
1827
+ double doubleFixedPoint = -0.00001234; // Use in Asserting.
1828
+ String longFloatingPoint = "12345e3";
1829
+ long longFixedPoint = 12_345_000L; // Use in Asserting.
1830
+
1831
+ String data = String.format(
1832
+ "{\"_j0\":%s, \"_j1\":%s}",
1833
+ doubleFloatingPoint,
1834
+ longFloatingPoint);
1835
+
1836
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1837
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1838
+ pageOutput.add(page);
1839
+ }
1840
+
1841
+ pageOutput.finish();
1842
+ }
1843
+
1844
+ PageReader pageReader = new PageReader(outputSchema);
1845
+
1846
+ for (Page page : mockPageOutput.pages) {
1847
+ pageReader.setPage(page);
1848
+ assertEquals(doubleFixedPoint, pageReader.getDouble(outputSchema.getColumn(0)), 0.0);
1849
+ assertEquals(longFixedPoint, pageReader.getLong(outputSchema.getColumn(1)));
1850
+ }
1851
+ }
1852
+ });
1853
+ }
1854
+
1855
+
1018
1856
  private static Schema schema(Object... nameAndTypes)
1019
1857
  {
1020
1858
  Schema.Builder builder = Schema.builder();