embulk-filter-expand_json 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ package org.embulk.filter.expand_json;
2
+
3
+ import com.jayway.jsonpath.spi.cache.NOOPCache;
4
+
5
+ // This class is used for test: testUseUserDefiledCacheProvider
6
+ public class MyNOOPCache
7
+ extends NOOPCache
8
+ {
9
+ }
@@ -6,6 +6,9 @@ import com.google.common.base.Throwables;
6
6
  import com.google.common.collect.ImmutableList;
7
7
  import com.google.common.collect.ImmutableMap;
8
8
  import com.jayway.jsonpath.InvalidJsonException;
9
+ import com.jayway.jsonpath.spi.cache.CacheProvider;
10
+ import com.jayway.jsonpath.spi.cache.LRUCache;
11
+ import com.jayway.jsonpath.spi.cache.NOOPCache;
9
12
  import org.embulk.EmbulkTestRuntime;
10
13
  import org.embulk.config.ConfigException;
11
14
  import org.embulk.config.ConfigLoader;
@@ -31,11 +34,18 @@ import org.junit.rules.ExpectedException;
31
34
  import org.msgpack.value.MapValue;
32
35
  import org.msgpack.value.Value;
33
36
 
37
+ import java.lang.reflect.Field;
34
38
  import java.util.List;
39
+ import java.util.Optional;
35
40
 
36
41
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
37
42
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
38
- import static org.embulk.spi.type.Types.*;
43
+ import static org.embulk.spi.type.Types.BOOLEAN;
44
+ import static org.embulk.spi.type.Types.DOUBLE;
45
+ import static org.embulk.spi.type.Types.JSON;
46
+ import static org.embulk.spi.type.Types.LONG;
47
+ import static org.embulk.spi.type.Types.STRING;
48
+ import static org.embulk.spi.type.Types.TIMESTAMP;
39
49
  import static org.junit.Assert.assertEquals;
40
50
  import static org.junit.Assert.assertNull;
41
51
  import static org.junit.Assert.assertTrue;
@@ -70,6 +80,25 @@ public class TestExpandJsonFilterPlugin
70
80
  expandJsonFilterPlugin = new ExpandJsonFilterPlugin();
71
81
  }
72
82
 
83
+ @Before
84
+ public void clearCacheProvider()
85
+ {
86
+ // NOTE: CacheProvider has cache as private static variables,
87
+ // so clear the variables before tests run.
88
+ try {
89
+ Class<?> klass = Class.forName(CacheProvider.class.getName());
90
+ Field cache = klass.getDeclaredField("cache");
91
+ cache.setAccessible(true);
92
+ cache.set(null, null);
93
+ Field cachingEnabled = klass.getDeclaredField("cachingEnabled");
94
+ cachingEnabled.setAccessible(true);
95
+ cachingEnabled.setBoolean(null, false);
96
+ }
97
+ catch (IllegalAccessException | NoSuchFieldException | ClassNotFoundException e) {
98
+ Throwables.propagate(e);
99
+ }
100
+ }
101
+
73
102
  private ConfigSource getConfigFromYaml(String yaml)
74
103
  {
75
104
  ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
@@ -229,6 +258,24 @@ public class TestExpandJsonFilterPlugin
229
258
  });
230
259
  }
231
260
 
261
+ @Test
262
+ public void testThrowExceptionUnsupportedCacheProvider()
263
+ {
264
+ String configYaml = "" +
265
+ "type: expand_json\n" +
266
+ "json_column_name: _c0\n" +
267
+ "cache_provider: unsupported_cache_provider\n" +
268
+ "expanded_columns:\n" +
269
+ " - {name: _e1, type: string}";
270
+ ConfigSource config = getConfigFromYaml(configYaml);
271
+
272
+ exception.expect(ConfigException.class);
273
+ exception.expectMessage("Cache Provider 'unsupported_cache_provider' is not supported: unsupported_cache_provider.");
274
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
275
+ // do nothing
276
+ });
277
+ }
278
+
232
279
  @Test
233
280
  public void testDefaultValue()
234
281
  {
@@ -251,6 +298,42 @@ public class TestExpandJsonFilterPlugin
251
298
  assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
252
299
  assertEquals(false, task.getStopOnInvalidRecord());
253
300
  assertEquals(false, task.getKeepExpandingJsonColumn());
301
+ assertEquals(Optional.empty(), task.getCacheProviderName());
302
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
303
+ assertEquals(LRUCache.class, CacheProvider.getCache().getClass());
304
+ });
305
+ }
306
+
307
+ @Test
308
+ public void testUseNOOPCacheProvider()
309
+ {
310
+ String configYaml = "" +
311
+ "type: expand_json\n" +
312
+ "json_column_name: _c0\n" +
313
+ "cache_provider: noop\n" +
314
+ "expanded_columns:\n" +
315
+ " - {name: _e0, type: string}";
316
+ ConfigSource config = getConfigFromYaml(configYaml);
317
+
318
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
319
+ assertEquals(NOOPCache.class, CacheProvider.getCache().getClass());
320
+ });
321
+ }
322
+
323
+ @Test
324
+ public void testUseUserDefiledCacheProvider()
325
+ {
326
+ String configYaml = "" +
327
+ "type: expand_json\n" +
328
+ "json_column_name: _c0\n" +
329
+ "cache_provider: " + MyNOOPCache.class.getName() + "\n" +
330
+ "expanded_columns:\n" +
331
+ " - {name: _e0, type: string}";
332
+ ConfigSource config = getConfigFromYaml(configYaml);
333
+
334
+ expandJsonFilterPlugin.transaction(config, schema, (taskSource, schema) -> {
335
+ assertEquals(MyNOOPCache.class, CacheProvider.getCache().getClass());
336
+ });
254
337
  }
255
338
 
256
339
  /*
@@ -368,7 +451,7 @@ public class TestExpandJsonFilterPlugin
368
451
  fail();
369
452
  }
370
453
  catch (Throwable t) {
371
- assertTrue(t instanceof DataException);
454
+ assertEquals(DataException.class, t.getClass());
372
455
  }
373
456
  }
374
457
  }
@@ -1015,6 +1098,761 @@ public class TestExpandJsonFilterPlugin
1015
1098
  });
1016
1099
  }
1017
1100
 
1101
+ // with NOOPCacheProvider
1102
+ // NOTE: The below tests are the same as the above tests except 'cache_provider' setting.
1103
+
1104
+ @Test
1105
+ public void testUnchangedColumnValuesWithNOOPCacheProvider()
1106
+ {
1107
+ String configYaml = "" +
1108
+ "type: expand_json\n" +
1109
+ "json_column_name: _c6\n" +
1110
+ "root: $.\n" +
1111
+ "cache_provider: noop\n" +
1112
+ "expanded_columns:\n" +
1113
+ " - {name: _e0, type: string}\n";
1114
+ final ConfigSource config = getConfigFromYaml(configYaml);
1115
+ final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
1116
+ "_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
1117
+
1118
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1119
+ {
1120
+ @Override
1121
+ public void run(TaskSource taskSource, Schema outputSchema)
1122
+ {
1123
+ MockPageOutput mockPageOutput = new MockPageOutput();
1124
+
1125
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1126
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1127
+ "_v0", // _c0
1128
+ true, // _c1
1129
+ 0.2, // _c2
1130
+ 3L, // _c3
1131
+ Timestamp.ofEpochSecond(4), // _c4
1132
+ newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
1133
+ "{\"_e0\":\"_v6\"}")) {
1134
+ pageOutput.add(page);
1135
+ }
1136
+
1137
+ pageOutput.finish();
1138
+ }
1139
+
1140
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
1141
+ assertEquals(1, records.size());
1142
+
1143
+ Object[] record = records.get(0);
1144
+ assertEquals("_v0", record[0]);
1145
+ assertEquals(true, record[1]);
1146
+ assertEquals(0.2, (double) record[2], 0.0001);
1147
+ assertEquals(3L, record[3]);
1148
+ assertEquals(Timestamp.ofEpochSecond(4), record[4]);
1149
+ assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
1150
+ }
1151
+ });
1152
+ }
1153
+
1154
+ @Test
1155
+ public void testStopOnInvalidRecordOptionWithNOOPCacheProvider()
1156
+ {
1157
+ String configYaml = "" +
1158
+ "type: expand_json\n" +
1159
+ "json_column_name: _c0\n" +
1160
+ "root: $.\n" +
1161
+ "cache_provider: noop\n" +
1162
+ "expanded_columns:\n" +
1163
+ " - {name: _e0, type: json}\n";
1164
+ final ConfigSource conf = getConfigFromYaml(configYaml);
1165
+ final Schema schema = schema("_c0", STRING);
1166
+
1167
+ { // stop_on_invalid_record: false
1168
+ ConfigSource config = conf.deepCopy();
1169
+
1170
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1171
+ {
1172
+ @Override
1173
+ public void run(TaskSource taskSource, Schema outputSchema)
1174
+ {
1175
+ MockPageOutput mockPageOutput = new MockPageOutput();
1176
+
1177
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1178
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1179
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
1180
+ pageOutput.add(page);
1181
+ }
1182
+
1183
+ pageOutput.finish();
1184
+ }
1185
+
1186
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
1187
+ assertEquals(1, records.size());
1188
+ assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
1189
+ }
1190
+ });
1191
+ }
1192
+
1193
+ // NOTE: CacheProvider is set the above test, so need to clear the CacheProvider before the below test.
1194
+ clearCacheProvider();
1195
+ { // stop_on_invalid_record: true
1196
+ ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
1197
+ try {
1198
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1199
+ {
1200
+ @Override
1201
+ public void run(TaskSource taskSource, Schema outputSchema)
1202
+ {
1203
+ MockPageOutput mockPageOutput = new MockPageOutput();
1204
+
1205
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1206
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1207
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
1208
+ pageOutput.add(page);
1209
+ }
1210
+
1211
+ pageOutput.finish();
1212
+ }
1213
+ }
1214
+ });
1215
+ fail();
1216
+ }
1217
+ catch (Throwable t) {
1218
+ t.printStackTrace();
1219
+ assertEquals(DataException.class, t.getClass());
1220
+ }
1221
+ }
1222
+ }
1223
+
1224
+ @Test
1225
+ public void testExpandJsonKeyToSchemaWithNOOPCacheProvider()
1226
+ {
1227
+ String configYaml = "" +
1228
+ "type: expand_json\n" +
1229
+ "json_column_name: _c0\n" +
1230
+ "root: $.\n" +
1231
+ "cache_provider: noop\n" +
1232
+ "expanded_columns:\n" +
1233
+ " - {name: _j1, type: boolean}\n" +
1234
+ " - {name: _j2, type: long}\n" +
1235
+ " - {name: _j3, type: timestamp}\n" +
1236
+ " - {name: _j4, type: double}\n" +
1237
+ " - {name: _j5, type: string}\n" +
1238
+ " - {name: _j6, type: json}\n" +
1239
+ " - {name: _c0, type: string}\n";
1240
+
1241
+ ConfigSource config = getConfigFromYaml(configYaml);
1242
+
1243
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1244
+ {
1245
+ @Override
1246
+ public void run(TaskSource taskSource, Schema outputSchema)
1247
+ {
1248
+ assertEquals(8, outputSchema.getColumnCount());
1249
+
1250
+ Column new_j1 = outputSchema.getColumn(0);
1251
+ Column new_j2 = outputSchema.getColumn(1);
1252
+ Column new_j3 = outputSchema.getColumn(2);
1253
+ Column new_j4 = outputSchema.getColumn(3);
1254
+ Column new_j5 = outputSchema.getColumn(4);
1255
+ Column new_j6 = outputSchema.getColumn(5);
1256
+ Column new_c0 = outputSchema.getColumn(6);
1257
+ Column old_c1 = outputSchema.getColumn(7);
1258
+
1259
+ assertEquals("_j1", new_j1.getName());
1260
+ assertEquals(BOOLEAN, new_j1.getType());
1261
+ assertEquals("_j2", new_j2.getName());
1262
+ assertEquals(LONG, new_j2.getType());
1263
+ assertEquals("_j3", new_j3.getName());
1264
+ assertEquals(TIMESTAMP, new_j3.getType());
1265
+ assertEquals("_j4", new_j4.getName());
1266
+ assertEquals(DOUBLE, new_j4.getType());
1267
+ assertEquals("_j5", new_j5.getName());
1268
+ assertEquals(STRING, new_j5.getType());
1269
+ assertEquals("_j6", new_j6.getName());
1270
+ assertEquals(JSON, new_j6.getType());
1271
+ assertEquals("_c0", new_c0.getName());
1272
+ assertEquals(STRING, new_c0.getType());
1273
+ assertEquals("_c1", old_c1.getName());
1274
+ assertEquals(STRING, old_c1.getType());
1275
+ }
1276
+ });
1277
+ }
1278
+
1279
+ @Test
1280
+ public void testColumnBasedTimezoneWithNOOPCacheProvider()
1281
+ {
1282
+ String configYaml = "" +
1283
+ "type: expand_json\n" +
1284
+ "json_column_name: _c0\n" +
1285
+ "root: $.\n" +
1286
+ "cache_provider: noop\n" +
1287
+ "expanded_columns:\n" +
1288
+ " - {name: _j0, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1289
+ " - {name: _j1, type: timestamp, format: '%Y-%m-%d %H:%M:%S', timezone: 'Asia/Tokyo'}\n";
1290
+
1291
+ ConfigSource config = getConfigFromYaml(configYaml);
1292
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1293
+
1294
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1295
+ {
1296
+ @Override
1297
+ public void run(TaskSource taskSource, Schema outputSchema)
1298
+ {
1299
+ MockPageOutput mockPageOutput = new MockPageOutput();
1300
+ Value data = newMapBuilder()
1301
+ .put(s("_j0"), s("2014-10-21 04:44:33 +0000"))
1302
+ .put(s("_j1"), s("2014-10-21 04:44:33"))
1303
+ .build();
1304
+
1305
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1306
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1307
+ pageOutput.add(page);
1308
+ }
1309
+
1310
+ pageOutput.finish();
1311
+ }
1312
+
1313
+ PageReader pageReader = new PageReader(outputSchema);
1314
+
1315
+ for (Page page : mockPageOutput.pages) {
1316
+ pageReader.setPage(page);
1317
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(0)).toString());
1318
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(1)).toString());
1319
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(2)));
1320
+ }
1321
+ }
1322
+ });
1323
+ }
1324
+
1325
+ @Test
1326
+ public void testExpandJsonValuesFromJsonWithNOOPCacheProvider()
1327
+ {
1328
+ String configYaml = "" +
1329
+ "type: expand_json\n" +
1330
+ "json_column_name: _c0\n" +
1331
+ "root: $.\n" +
1332
+ "default_timezone: Asia/Tokyo\n" +
1333
+ "cache_provider: noop\n" +
1334
+ "expanded_columns:\n" +
1335
+ " - {name: _j0, type: boolean}\n" +
1336
+ " - {name: _j1, type: long}\n" +
1337
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1338
+ " - {name: _j3, type: double}\n" +
1339
+ " - {name: _j4, type: string}\n" +
1340
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1341
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
1342
+ // JsonPath: https://github.com/jayway/JsonPath
1343
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
1344
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
1345
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
1346
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
1347
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
1348
+ " - {name: _c0, type: string}\n";
1349
+
1350
+ ConfigSource config = getConfigFromYaml(configYaml);
1351
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1352
+
1353
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1354
+ {
1355
+ @Override
1356
+ public void run(TaskSource taskSource, Schema outputSchema)
1357
+ {
1358
+ MockPageOutput mockPageOutput = new MockPageOutput();
1359
+ Value data = newMapBuilder()
1360
+ .put(s("_j0"), b(true))
1361
+ .put(s("_j1"), i(2))
1362
+ .put(s("_j2"), s("2014-10-21 04:44:33 +0900"))
1363
+ .put(s("_j3"), f(4.4))
1364
+ .put(s("_j4"), s("v5"))
1365
+ .put(s("_j5"), s("2014-10-21 04:44:33 +0000"))
1366
+ .put(s("_j6"), s("2014-10-21 04:44:33"))
1367
+ .put(s("_j7"), newMapBuilder()
1368
+ .put(s("store"), newMapBuilder()
1369
+ .put(s("book"), newArray(
1370
+ newMap(s("author"), s("Nigel Rees"), s("title"), s("Sayings of the Century"), s("price"), f(8.95)),
1371
+ newMap(s("author"), s("Evelyn Waugh"), s("title"), s("Sword of Honour"), s("price"), f(12.99)),
1372
+ newMap(s("author"), s("Herman Melville"), s("title"), s("Moby Dick"), s("isbn"), s("0-553-21311-3"), s("price"), f(8.99)),
1373
+ newMap(s("author"), s("J. R. R. Tolkien"), s("title"), s("The Lord of the Rings"), s("isbn"), s("0-395-19395-8"), s("price"), f(22.99))
1374
+ ))
1375
+ .put(s("bicycle"), newMap(s("color"), s("red"), s("price"), f(19.95)))
1376
+ .build())
1377
+ .put(s("expensive"), i(10))
1378
+ .build())
1379
+ .put(s("_c0"), s("v12"))
1380
+ .build();
1381
+
1382
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1383
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1384
+ pageOutput.add(page);
1385
+ }
1386
+
1387
+ pageOutput.finish();
1388
+ }
1389
+
1390
+ PageReader pageReader = new PageReader(outputSchema);
1391
+
1392
+ for (Page page : mockPageOutput.pages) {
1393
+ pageReader.setPage(page);
1394
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
1395
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
1396
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
1397
+ assertEquals(String.valueOf(4.4), String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
1398
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
1399
+ assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
1400
+ assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
1401
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
1402
+ pageReader.getString(outputSchema.getColumn(7)));
1403
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]", pageReader.getString(outputSchema.getColumn(8)));
1404
+ assertEquals("" +
1405
+ "[" +
1406
+ "{\"author\":\"Herman Melville\",\"title\":\"Moby Dick\",\"isbn\":\"0-553-21311-3\",\"price\":8.99}," +
1407
+ "{\"author\":\"J. R. R. Tolkien\",\"title\":\"The Lord of the Rings\",\"isbn\":\"0-395-19395-8\",\"price\":22.99}" +
1408
+ "]",
1409
+ pageReader.getString(outputSchema.getColumn(9)));
1410
+ assertEquals("[\"Sayings of the Century\"]", pageReader.getString(outputSchema.getColumn(10)));
1411
+ assertEquals("Herman Melville", pageReader.getString(outputSchema.getColumn(11)));
1412
+ assertEquals("v12", pageReader.getString(outputSchema.getColumn(12)));
1413
+ assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(13)));
1414
+ }
1415
+ }
1416
+ });
1417
+ }
1418
+
1419
+ @Test(expected = DataException.class)
1420
+ public void testSetExpandedJsonColumnsSetInvalidDoubleValueWithNOOPCacheProvider()
1421
+ {
1422
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("double", s("abcde"));
1423
+ }
1424
+
1425
+ @Test(expected = DataException.class)
1426
+ public void testSetExpandedJsonColumnsSetInvalidLongValueWithNOOPCacheProvider()
1427
+ {
1428
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("long", s("abcde"));
1429
+ }
1430
+
1431
+ @Test(expected = DataException.class)
1432
+ public void testSetExpandedJsonColumnsSetInvalidTimestampValueWithNOOPCacheProvider()
1433
+ {
1434
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("timestamp", s("abcde"));
1435
+ }
1436
+
1437
+ @Test(expected = DataException.class)
1438
+ public void testSetExpandedJsonColumnsSetInvalidJsonValueWithNOOPCacheProvider()
1439
+ {
1440
+ setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider("json", s("abcde"));
1441
+ }
1442
+
1443
+ public void setExpandedJsonColumnsWithInvalidValueWithNOOPCacheProvider(String ValidType, final Value invalidValue)
1444
+ {
1445
+ String configYaml = "" +
1446
+ "type: expand_json\n" +
1447
+ "stop_on_invalid_record: 1\n" +
1448
+ "json_column_name: _c0\n" +
1449
+ "root: $.\n" +
1450
+ "default_timezone: Asia/Tokyo\n" +
1451
+ "cache_provider: noop\n" +
1452
+ "expanded_columns:\n" +
1453
+ " - {name: _j0, type: " + ValidType + "}\n";
1454
+
1455
+ ConfigSource config = getConfigFromYaml(configYaml);
1456
+ final Schema schema = schema("_c0", JSON, "_c1", STRING);
1457
+
1458
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1459
+ {
1460
+ @Override
1461
+ public void run(TaskSource taskSource, Schema outputSchema)
1462
+ {
1463
+ MockPageOutput mockPageOutput = new MockPageOutput();
1464
+ Value data = newMapBuilder()
1465
+ .put(s("_j0"), invalidValue)
1466
+ .build();
1467
+
1468
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1469
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1470
+ pageOutput.add(page);
1471
+ }
1472
+
1473
+ pageOutput.finish();
1474
+ }
1475
+ }
1476
+ });
1477
+ }
1478
+
1479
+ @Test
1480
+ public void testExpandedJsonValuesWithKeepJsonColumnsWithNOOPCacheProvider()
1481
+ {
1482
+ final String configYaml = "" +
1483
+ "type: expand_json\n" +
1484
+ "json_column_name: _c1\n" +
1485
+ "root: $.\n" +
1486
+ "cache_provider: noop\n" +
1487
+ "expanded_columns:\n" +
1488
+ " - {name: _e0, type: string}\n" +
1489
+ "keep_expanding_json_column: true\n";
1490
+
1491
+ ConfigSource config = getConfigFromYaml(configYaml);
1492
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
1493
+
1494
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1495
+ {
1496
+ @Override
1497
+ public void run(TaskSource taskSource, Schema outputSchema)
1498
+ {
1499
+ MockPageOutput mockPageOutput = new MockPageOutput();
1500
+
1501
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1502
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1503
+ "_v0", "{\"_e0\":\"_ev0\"}")) {
1504
+ pageOutput.add(page);
1505
+ }
1506
+
1507
+ pageOutput.finish();
1508
+ }
1509
+
1510
+ assertEquals(3, outputSchema.getColumnCount());
1511
+ Column column;
1512
+ { // 1st column
1513
+ column = outputSchema.getColumn(0);
1514
+ assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
1515
+ }
1516
+ { // 2nd column
1517
+ column = outputSchema.getColumn(1);
1518
+ assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
1519
+ }
1520
+ { // 3rd column
1521
+ column = outputSchema.getColumn(2);
1522
+ assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
1523
+ }
1524
+
1525
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
1526
+ assertEquals("_v0", record[0]);
1527
+ assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
1528
+ assertEquals("_ev0", record[2]);
1529
+ }
1530
+ }
1531
+ });
1532
+ }
1533
+
1534
+ @Test
1535
+ public void testExpandSpecialJsonValuesFromStringWithNOOPCacheProvider()
1536
+ {
1537
+ final String configYaml = "" +
1538
+ "type: expand_json\n" +
1539
+ "json_column_name: _c1\n" +
1540
+ "root: $.\n" +
1541
+ "cache_provider: noop\n" +
1542
+ "expanded_columns:\n" +
1543
+ " - {name: _e0, type: string}\n" +
1544
+ " - {name: _e1, type: string}\n"; // the value will be null
1545
+
1546
+ ConfigSource config = getConfigFromYaml(configYaml);
1547
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
1548
+
1549
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1550
+ {
1551
+ @Override
1552
+ public void run(TaskSource taskSource, Schema outputSchema)
1553
+ {
1554
+ MockPageOutput mockPageOutput = new MockPageOutput();
1555
+
1556
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1557
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
1558
+ "_v0", "")) {
1559
+ pageOutput.add(page);
1560
+ }
1561
+
1562
+ pageOutput.finish();
1563
+ }
1564
+
1565
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
1566
+ assertEquals("_v0", record[0]);
1567
+ assertNull(record[1]);
1568
+ assertNull(record[2]);
1569
+ }
1570
+ }
1571
+ });
1572
+ }
1573
+
1574
+ @Test
1575
+ public void testExpandJsonValuesFromStringWithNOOPCacheProvider()
1576
+ {
1577
+ String configYaml = "" +
1578
+ "type: expand_json\n" +
1579
+ "json_column_name: _c0\n" +
1580
+ "root: $.\n" +
1581
+ "default_timezone: Asia/Tokyo\n" +
1582
+ "cache_provider: noop\n" +
1583
+ "expanded_columns:\n" +
1584
+ " - {name: _j0, type: boolean}\n" +
1585
+ " - {name: _j1, type: long}\n" +
1586
+ " - {name: _j2, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1587
+ " - {name: _j3, type: double}\n" +
1588
+ " - {name: _j4, type: string}\n" +
1589
+ " - {name: _j5, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
1590
+ " - {name: _j6, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}\n" +
1591
+ // JsonPath: https://github.com/jayway/JsonPath
1592
+ " - {name: '_j7.store.book[*].author', type: string}\n" +
1593
+ " - {name: '_j7..book[?(@.price <= $[''_j7''][''expensive''])].author', type: string}\n" +
1594
+ " - {name: '_j7..book[?(@.isbn)]', type: string}\n" +
1595
+ " - {name: '_j7..book[?(@.author =~ /.*REES/i)].title', type: string}\n" +
1596
+ " - {name: '_j7.store.book[2].author', type: string}\n" +
1597
+ " - {name: _c0, type: string}\n";
1598
+
1599
+ ConfigSource config = getConfigFromYaml(configYaml);
1600
+
1601
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1602
+ {
1603
+ @Override
1604
+ public void run(TaskSource taskSource, Schema outputSchema)
1605
+ {
1606
+ MockPageOutput mockPageOutput = new MockPageOutput();
1607
+ PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource,
1608
+ schema,
1609
+ outputSchema,
1610
+ mockPageOutput);
1611
+
1612
+ ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
1613
+ builder.put("_j0", true);
1614
+ builder.put("_j1", 2);
1615
+ builder.put("_j2", "2014-10-21 04:44:33 +0900");
1616
+ builder.put("_j3", 4.4);
1617
+ builder.put("_j4", "v5");
1618
+ builder.put("_j5", "2014-10-21 04:44:33 +0000");
1619
+ builder.put("_j6", "2014-10-21 04:44:33");
1620
+ builder.put("_j7",
1621
+ ImmutableMap.of("store",
1622
+ ImmutableMap.of("book",
1623
+ ImmutableList.of(ImmutableMap.of("author",
1624
+ "Nigel Rees",
1625
+ "title",
1626
+ "Sayings of the Century",
1627
+ "price",
1628
+ 8.95),
1629
+ ImmutableMap.of("author",
1630
+ "Evelyn Waugh",
1631
+ "title",
1632
+ "Sword of Honour",
1633
+ "price",
1634
+ 12.99),
1635
+ ImmutableMap.of("author",
1636
+ "Herman Melville",
1637
+ "title",
1638
+ "Moby Dick",
1639
+ "isbn",
1640
+ "0-553-21311-3",
1641
+ "price",
1642
+ 8.99),
1643
+ ImmutableMap.of("author",
1644
+ "J. R. R. Tolkien",
1645
+ "title",
1646
+ "The Lord of the Rings",
1647
+ "isbn",
1648
+ "0-395-19395-8",
1649
+ "price",
1650
+ 22.99)
1651
+ ),
1652
+ "bicycle",
1653
+ ImmutableMap.of("color",
1654
+ "red",
1655
+ "price",
1656
+ 19.95
1657
+ )
1658
+ ),
1659
+ "expensive",
1660
+ 10
1661
+ )
1662
+ /*
1663
+ {
1664
+ "store": {
1665
+ "book": [
1666
+ {
1667
+ "author": "Nigel Rees",
1668
+ "title": "Sayings of the Century",
1669
+ "price": 8.95
1670
+ },
1671
+ {
1672
+ "author": "Evelyn Waugh",
1673
+ "title": "Sword of Honour",
1674
+ "price": 12.99
1675
+ },
1676
+ {
1677
+ "author": "Herman Melville",
1678
+ "title": "Moby Dick",
1679
+ "isbn": "0-553-21311-3",
1680
+ "price": 8.99
1681
+ },
1682
+ {
1683
+ "author": "J. R. R. Tolkien",
1684
+ "title": "The Lord of the Rings",
1685
+ "isbn": "0-395-19395-8",
1686
+ "price": 22.99
1687
+ }
1688
+ ],
1689
+ "bicycle": {
1690
+ "color": "red",
1691
+ "price": 19.95
1692
+ }
1693
+ },
1694
+ "expensive": 10
1695
+ }
1696
+ */
1697
+ );
1698
+ builder.put("_c0", "v12");
1699
+
1700
+ String data = convertToJsonString(builder.build());
1701
+
1702
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
1703
+ schema,
1704
+ data, c1Data)) {
1705
+ pageOutput.add(page);
1706
+ }
1707
+
1708
+ pageOutput.finish();
1709
+ pageOutput.close();
1710
+
1711
+ PageReader pageReader = new PageReader(outputSchema);
1712
+
1713
+ for (Page page : mockPageOutput.pages) {
1714
+ pageReader.setPage(page);
1715
+ assertEquals(true, pageReader.getBoolean(outputSchema.getColumn(0)));
1716
+ assertEquals(2, pageReader.getLong(outputSchema.getColumn(1)));
1717
+ assertEquals("2014-10-20 19:44:33 UTC",
1718
+ pageReader.getTimestamp(outputSchema.getColumn(2)).toString());
1719
+ assertEquals(String.valueOf(4.4),
1720
+ String.valueOf(pageReader.getDouble(outputSchema.getColumn(3))));
1721
+ assertEquals("v5", pageReader.getString(outputSchema.getColumn(4)));
1722
+ assertEquals("2014-10-21 04:44:33 UTC",
1723
+ pageReader.getTimestamp(outputSchema.getColumn(5)).toString());
1724
+ assertEquals("2014-10-20 19:44:33 UTC",
1725
+ pageReader.getTimestamp(outputSchema.getColumn(6)).toString());
1726
+ assertEquals("[\"Nigel Rees\",\"Evelyn Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]",
1727
+ pageReader.getString(outputSchema.getColumn(7)));
1728
+ assertEquals("[\"Nigel Rees\",\"Herman Melville\"]",
1729
+ pageReader.getString(outputSchema.getColumn(8)));
1730
+ assertEquals("" +
1731
+ "[" +
1732
+ "{" +
1733
+ "\"author\":\"Herman Melville\"," +
1734
+ "\"title\":\"Moby Dick\"," +
1735
+ "\"isbn\":\"0-553-21311-3\"," +
1736
+ "\"price\":8.99" +
1737
+ "}," +
1738
+ "{" +
1739
+ "\"author\":\"J. R. R. Tolkien\"," +
1740
+ "\"title\":\"The Lord of the Rings\"," +
1741
+ "\"isbn\":\"0-395-19395-8\"," +
1742
+ "\"price\":22.99" +
1743
+ "}" +
1744
+ "]",
1745
+ pageReader.getString(outputSchema.getColumn(9)));
1746
+ assertEquals("[\"Sayings of the Century\"]",
1747
+ pageReader.getString(outputSchema.getColumn(10)));
1748
+ assertEquals("Herman Melville",
1749
+ pageReader.getString(outputSchema.getColumn(11)));
1750
+ assertEquals("v12",
1751
+ pageReader.getString(outputSchema.getColumn(12)));
1752
+ assertEquals(c1Data,
1753
+ pageReader.getString(outputSchema.getColumn(13)));
1754
+ }
1755
+ }
1756
+ });
1757
+ }
1758
+
1759
+ @Test
1760
+ public void testAbortBrokenJsonStringWithNOOPCacheProvider()
1761
+ {
1762
+ String configYaml = "" +
1763
+ "type: expand_json\n" +
1764
+ "json_column_name: _c0\n" +
1765
+ "root: $.\n" +
1766
+ "default_timezone: Asia/Tokyo\n" +
1767
+ "cache_provider: noop\n" +
1768
+ "expanded_columns:\n" +
1769
+ " - {name: _j0, type: string}\n";
1770
+ ConfigSource config = getConfigFromYaml(configYaml);
1771
+
1772
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1773
+ {
1774
+ @Override
1775
+ public void run(TaskSource taskSource, Schema outputSchema)
1776
+ {
1777
+ MockPageOutput mockPageOutput = new MockPageOutput();
1778
+ PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource,
1779
+ schema,
1780
+ outputSchema,
1781
+ mockPageOutput);
1782
+
1783
+ String data = getBrokenJsonString();
1784
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(),
1785
+ schema,
1786
+ data, c1Data)) {
1787
+ exception.expect(InvalidJsonException.class);
1788
+ exception.expectMessage("Unexpected End Of File position 12: null");
1789
+ pageOutput.add(page);
1790
+ }
1791
+
1792
+ pageOutput.finish();
1793
+ pageOutput.close();
1794
+
1795
+ PageReader pageReader = new PageReader(outputSchema);
1796
+
1797
+ for (Page page : mockPageOutput.pages) {
1798
+ pageReader.setPage(page);
1799
+ assertEquals("te", pageReader.getString(outputSchema.getColumn(0)));
1800
+ }
1801
+ }
1802
+ });
1803
+ }
1804
+
1805
+ @Test
1806
+ public void testParseNumbersInExponentialNotationWithNOOPCacheProvider()
1807
+ {
1808
+ final String configYaml = "" +
1809
+ "type: expand_json\n" +
1810
+ "json_column_name: _c1\n" +
1811
+ "root: $.\n" +
1812
+ "cache_provider: noop\n" +
1813
+ "expanded_columns:\n" +
1814
+ " - {name: _j0, type: double}\n" +
1815
+ " - {name: _j1, type: long}\n";
1816
+ ConfigSource config = getConfigFromYaml(configYaml);
1817
+ final Schema schema = schema("_c1", STRING);
1818
+
1819
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
1820
+ {
1821
+ @Override
1822
+ public void run(TaskSource taskSource, Schema outputSchema)
1823
+ {
1824
+ MockPageOutput mockPageOutput = new MockPageOutput();
1825
+
1826
+ String doubleFloatingPoint = "-1.234e-5";
1827
+ double doubleFixedPoint = -0.00001234; // Use in Asserting.
1828
+ String longFloatingPoint = "12345e3";
1829
+ long longFixedPoint = 12_345_000L; // Use in Asserting.
1830
+
1831
+ String data = String.format(
1832
+ "{\"_j0\":%s, \"_j1\":%s}",
1833
+ doubleFloatingPoint,
1834
+ longFloatingPoint);
1835
+
1836
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
1837
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
1838
+ pageOutput.add(page);
1839
+ }
1840
+
1841
+ pageOutput.finish();
1842
+ }
1843
+
1844
+ PageReader pageReader = new PageReader(outputSchema);
1845
+
1846
+ for (Page page : mockPageOutput.pages) {
1847
+ pageReader.setPage(page);
1848
+ assertEquals(doubleFixedPoint, pageReader.getDouble(outputSchema.getColumn(0)), 0.0);
1849
+ assertEquals(longFixedPoint, pageReader.getLong(outputSchema.getColumn(1)));
1850
+ }
1851
+ }
1852
+ });
1853
+ }
1854
+
1855
+
1018
1856
  private static Schema schema(Object... nameAndTypes)
1019
1857
  {
1020
1858
  Schema.Builder builder = Schema.builder();