mustrd 0.2.6.1__py3-none-any.whl → 0.2.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mustrd/mustrd.py CHANGED
@@ -23,7 +23,7 @@ SOFTWARE.
23
23
  """
24
24
 
25
25
  import os
26
- from typing import Tuple, List
26
+ from typing import Tuple, List, Union
27
27
 
28
28
  import tomli
29
29
  from rdflib.plugins.parsers.notation3 import BadSyntax
@@ -54,8 +54,11 @@ from pyshacl import validate
54
54
  import logging
55
55
  from http.client import HTTPConnection
56
56
  from .steprunner import upload_given, run_when
57
+ from multimethods import MultiMethod
58
+ import logging
59
+ import traceback
57
60
 
58
- log = logger_setup.setup_logger(__name__)
61
+ log = logging.getLogger(__name__)
59
62
 
60
63
  requests.packages.urllib3.disable_warnings()
61
64
  requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
@@ -89,7 +92,7 @@ def debug_requests_off():
89
92
  debug_requests_off()
90
93
 
91
94
 
92
- @dataclass
95
+ @dataclass(frozen=True)
93
96
  class Specification:
94
97
  spec_uri: URIRef
95
98
  triple_store: dict
@@ -97,6 +100,7 @@ class Specification:
97
100
  when: WhenSpec
98
101
  then: ThenSpec
99
102
  spec_file_name: str = "default.mustrd.ttl"
103
+ spec_source_file: Path = Path("default.mustrd.ttl")
100
104
 
101
105
 
102
106
  @dataclass
@@ -157,6 +161,7 @@ class TripleStoreConnectionError(SpecResult):
157
161
  class SpecSkipped(SpecResult):
158
162
  message: str
159
163
  spec_file_name: str = "default.mustrd.ttl"
164
+ spec_source_file: Path = Path("default.mustrd.ttl")
160
165
 
161
166
 
162
167
  @dataclass
@@ -180,21 +185,38 @@ class UpdateSparqlQuery(SparqlAction):
180
185
 
181
186
 
182
187
  # https://github.com/Semantic-partners/mustrd/issues/19
183
-
184
- def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
188
+ # Validate the specs found in spec_path
189
+ def validate_specs(run_config: dict,
190
+ triple_stores: List,
191
+ shacl_graph: Graph,
192
+ ont_graph: Graph,
193
+ file_name: str = "*",
194
+ selected_test_files: List[str] = [])\
185
195
  -> Tuple[List, Graph, List]:
186
196
  spec_graph = Graph()
187
197
  subject_uris = set()
188
198
  focus_uris = set()
189
199
  invalid_specs = []
190
- ttl_files = list(run_config['spec_path'].glob(f'**/{file_name}.mustrd.ttl'))
200
+ ttl_files = []
201
+
202
+ if not selected_test_files:
203
+ ttl_files = list(run_config['spec_path'].glob(
204
+ f'**/{file_name}.mustrd.ttl'))
205
+ log.info(
206
+ f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
207
+ else:
208
+ ttl_files = selected_test_files
209
+
210
+ log.info(f"Using {ttl_files} for test source")
191
211
  ttl_files.sort()
192
- log.info(f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
193
212
 
213
+ # For each spec file found in spec_path
194
214
  for file in ttl_files:
215
+ # file = file.resolve()
195
216
  error_messages = []
196
217
 
197
218
  log.info(f"Parse: {file}")
219
+ # Parse spec file and add error message if not conform to RDF standard
198
220
  try:
199
221
  file_graph = Graph().parse(file)
200
222
  except BadSyntax as e:
@@ -204,6 +226,7 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
204
226
  error_messages += [f"Could not extract spec from {file} due to exception of type "
205
227
  f"{type(e).__name__} when parsing file"]
206
228
  continue
229
+
207
230
  # run shacl validation
208
231
  conforms, results_graph, results_text = validate(file_graph,
209
232
  shacl_graph=shacl_graph,
@@ -216,6 +239,8 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
216
239
  advanced=True,
217
240
  js=False,
218
241
  debug=False)
242
+
243
+ # Add error message if not conform to spec shapes
219
244
  if not conforms:
220
245
  for msg in results_graph.objects(predicate=SH.resultMessage):
221
246
  log.warning(f"{file_graph}")
@@ -223,47 +248,66 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
223
248
  error_messages += [f"{msg} File: {file.name}"]
224
249
 
225
250
  # collect a list of uris of the tests in focus
251
+ # If focus is found, only the spec in the focus will be executed
226
252
  for focus_uri in file_graph.subjects(predicate=MUST.focus, object=Literal("true", datatype=XSD.boolean)):
227
253
  if focus_uri in focus_uris:
228
254
  focus_uri = URIRef(str(focus_uri) + "_DUPLICATE")
229
255
  focus_uris.add(focus_uri)
230
256
 
231
- # make sure there are no duplicate test IRIs in the files
232
- for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
233
- if subject_uri in subject_uris:
234
- log.warning(f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
235
- error_messages += [f"Duplicate subject URI found in {file.name}."]
236
- subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
237
- if len(error_messages) > 0:
238
- error_messages.sort()
239
- error_message = "\n".join(msg for msg in error_messages)
240
- invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
241
- for triple_store in triple_stores]
242
- else:
243
- subject_uris.add(subject_uri)
244
- this_spec_graph = Graph()
245
- this_spec_graph.parse(file)
246
- spec_uris_in_this_file = list(this_spec_graph.subjects(RDF.type, MUST.TestSpec))
247
- for spec in spec_uris_in_this_file:
248
- this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
249
- this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
250
- spec_graph += this_spec_graph
257
+ add_spec_validation(file_graph, subject_uris, file,
258
+ triple_stores, error_messages, invalid_specs, spec_graph)
251
259
 
252
260
  valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
253
261
 
254
262
  if focus_uris:
255
- invalid_focus_specs = []
256
- for spec in invalid_specs:
257
- if spec.spec_uri in focus_uris:
258
- invalid_focus_specs += [spec]
259
- focus_uris.remove(spec.spec_uri)
260
- log.info(f"Collected {len(focus_uris)} focus test spec(s)")
263
+ invalid_focus_specs = get_invalid_focus_spec(focus_uris, invalid_specs)
261
264
  return focus_uris, spec_graph, invalid_focus_specs
262
265
  else:
263
266
  log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
264
267
  return valid_spec_uris, spec_graph, invalid_specs
265
268
 
266
269
 
270
+ def get_invalid_focus_spec(focus_uris: set, invalid_specs: list):
271
+ invalid_focus_specs = []
272
+ for spec in invalid_specs:
273
+ if spec.spec_uri in focus_uris:
274
+ invalid_focus_specs += [spec]
275
+ focus_uris.remove(spec.spec_uri)
276
+ log.info(f"Collected {len(focus_uris)} focus test spec(s)")
277
+ return invalid_focus_specs
278
+
279
+
280
+ # Detect duplicate,
281
+ # If no error: associate the spec configuration and the file where this conf is stored
282
+ # If error, aggregate the messages and mark spec as skipped
283
+ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple_stores: List,
284
+ error_messages: list, invalid_specs: list, spec_graph: Graph):
285
+
286
+ for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
287
+ # If we already collected a URI, then we tag it as duplicate and it won't be executed
288
+ if subject_uri in subject_uris:
289
+ log.warning(
290
+ f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
291
+ error_messages += [f"Duplicate subject URI found in {file.name}."]
292
+ subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
293
+ if len(error_messages) == 0:
294
+ subject_uris.add(subject_uri)
295
+ this_spec_graph = Graph()
296
+ this_spec_graph.parse(file)
297
+ spec_uris_in_this_file = list(
298
+ this_spec_graph.subjects(RDF.type, MUST.TestSpec))
299
+ for spec in spec_uris_in_this_file:
300
+ this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
301
+ this_spec_graph.add(
302
+ [spec, MUST.specFileName, Literal(file.name)])
303
+ spec_graph += this_spec_graph
304
+ else:
305
+ error_messages.sort()
306
+ error_message = "\n".join(msg for msg in error_messages)
307
+ invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name, file)
308
+ for triple_store in triple_stores]
309
+
310
+
267
311
  def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[dict],
268
312
  run_config: dict):
269
313
  specs = []
@@ -271,14 +315,16 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
271
315
  try:
272
316
  for triple_store in triple_stores:
273
317
  if "error" in triple_store:
274
- log.error(f"{triple_store['error']}. No specs run for this triple store.")
318
+ log.error(
319
+ f"{triple_store['error']}. No specs run for this triple store.")
275
320
  skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
276
321
  get_spec_file(spec_uri, spec_graph)) for spec_uri in
277
322
  spec_uris]
278
323
  else:
279
324
  for spec_uri in spec_uris:
280
325
  try:
281
- specs += [get_spec(spec_uri, spec_graph, run_config, triple_store)]
326
+ specs += [get_spec(spec_uri, spec_graph,
327
+ run_config, triple_store)]
282
328
  except (ValueError, FileNotFoundError, ConnectionError) as e:
283
329
  skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
284
330
  e, get_spec_file(spec_uri, spec_graph))]
@@ -319,9 +365,11 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
319
365
  mustrd_triple_store=mustrd_triple_store))
320
366
 
321
367
  spec_file_name = get_spec_file(spec_uri, spec_graph)
368
+ spec_file_path = Path(spec_graph.value(
369
+ subject=spec_uri, predicate=MUST.specSourceFile, default=Path("default.mustrd.ttl")))
322
370
  # https://github.com/Semantic-partners/mustrd/issues/92
323
371
  return Specification(spec_uri, mustrd_triple_store,
324
- components[0].value, components[1], components[2], spec_file_name)
372
+ components[0].value, components[1], components[2], spec_file_name, spec_file_path)
325
373
 
326
374
  except (ValueError, FileNotFoundError) as e:
327
375
  template = "An exception of type {0} occurred. Arguments:\n{1!r}"
@@ -333,17 +381,29 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
333
381
  raise
334
382
 
335
383
 
336
- def check_result(spec, result):
384
+ def check_result(spec: Specification, result: Union[str, Graph]):
385
+ log.debug(
386
+ f"check_result {spec.spec_uri=}, {spec.triple_store=}, {result=} {type(spec.then)}")
337
387
  if isinstance(spec.then, TableThenSpec):
388
+ log.debug(f"table_comparison")
338
389
  return table_comparison(result, spec)
339
390
  else:
340
391
  graph_compare = graph_comparison(spec.then.value, result)
341
392
  if isomorphic(result, spec.then.value):
342
- return SpecPassed(spec.spec_uri, spec.triple_store["type"])
393
+ log.debug(f"isomorphic {spec}")
394
+ log.debug(f"{spec.spec_uri}")
395
+ log.debug(f"{spec.triple_store}")
396
+ ret = SpecPassed(spec.spec_uri, spec.triple_store["type"])
397
+
398
+ return ret
343
399
  else:
400
+ log.debug(f"not isomorphic")
344
401
  if spec.when[0].queryType == MUST.ConstructSparql:
402
+ log.debug(f"ConstructSpecFailure")
403
+
345
404
  return ConstructSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
346
405
  else:
406
+ log.debug(f"UpdateSpecFailure")
347
407
  return UpdateSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
348
408
 
349
409
 
@@ -351,7 +411,8 @@ def run_spec(spec: Specification) -> SpecResult:
351
411
  spec_uri = spec.spec_uri
352
412
  triple_store = spec.triple_store
353
413
  # close_connection = True
354
- log.debug(f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
414
+ log.debug(
415
+ f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
355
416
  if spec.given:
356
417
  given_as_turtle = spec.given.serialize(format="turtle")
357
418
  log.debug(f"{given_as_turtle}")
@@ -361,12 +422,18 @@ def run_spec(spec: Specification) -> SpecResult:
361
422
  return SpecSkipped(spec_uri, triple_store['type'], "Unable to run Inherited State tests on Rdflib")
362
423
  try:
363
424
  for when in spec.when:
364
- log.info(f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
425
+ log.info(
426
+ f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
365
427
  try:
366
428
  result = run_when(spec_uri, triple_store, when)
429
+ log.info(
430
+ f"run {when.queryType} spec {spec_uri} on {triple_store['type']} {result=}")
367
431
  except ParseException as e:
432
+ log.error(
433
+ f"parseException {e}")
368
434
  return SparqlParseFailure(spec_uri, triple_store["type"], e)
369
435
  except NotImplementedError as ex:
436
+ log.error(f"NotImplementedError {ex}")
370
437
  return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
371
438
  return check_result(spec, result)
372
439
  except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout, OSError) as e:
@@ -378,7 +445,13 @@ def run_spec(spec: Specification) -> SpecResult:
378
445
  except (TypeError, RequestException) as e:
379
446
  log.error(f"{type(e)} {e}")
380
447
  return SparqlExecutionError(spec_uri, triple_store["type"], e)
381
-
448
+ except Exception as e:
449
+ if e:
450
+ log.error(f"Unknown error {e}\n{traceback.format_exc()}")
451
+ raise
452
+ else:
453
+ log.error(f"Unknown error")
454
+ return RuntimeError(spec_uri, triple_store["type"], e)
382
455
  # https://github.com/Semantic-partners/mustrd/issues/78
383
456
  # finally:
384
457
  # if type(mustrd_triple_store) == MustrdAnzo and close_connection:
@@ -394,17 +467,21 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
394
467
  return Graph().parse(triple_store_graph_path).parse(secret_path)
395
468
 
396
469
 
470
+ # Parse and validate triple store configuration
397
471
  def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
398
472
  triple_stores = []
399
- shacl_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
400
- ont_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
473
+ shacl_graph = Graph().parse(
474
+ Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
475
+ ont_graph = Graph().parse(
476
+ Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
477
+ # SHACL validation of triple store configuration
401
478
  conforms, results_graph, results_text = validate(
402
- data_graph=triple_store_graph,
403
- shacl_graph=shacl_graph,
404
- ont_graph=ont_graph,
405
- advanced=True,
406
- inference='none'
407
- )
479
+ data_graph=triple_store_graph,
480
+ shacl_graph=shacl_graph,
481
+ ont_graph=ont_graph,
482
+ advanced=True,
483
+ inference='none'
484
+ )
408
485
  if not conforms:
409
486
  raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
410
487
  results_graph)
@@ -414,46 +491,13 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
414
491
  triple_store["uri"] = triple_store_config
415
492
  # Anzo graph via anzo
416
493
  if triple_store_type == TRIPLESTORE.Anzo:
417
- triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
418
- triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
419
- try:
420
- triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
421
- predicate=TRIPLESTORE.username))
422
- triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
423
- predicate=TRIPLESTORE.password))
424
- except (FileNotFoundError, ValueError) as e:
425
- triple_store["error"] = e
426
- triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
427
- predicate=TRIPLESTORE.gqeURI)
428
- triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
429
- predicate=TRIPLESTORE.inputGraph)
430
- triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
431
- predicate=TRIPLESTORE.outputGraph)
432
- try:
433
- check_triple_store_params(triple_store, ["url", "port", "username", "password", "input_graph"])
434
- except ValueError as e:
435
- triple_store["error"] = e
494
+ get_anzo_configuration(
495
+ triple_store, triple_store_graph, triple_store_config)
436
496
  # GraphDB
437
497
  elif triple_store_type == TRIPLESTORE.GraphDb:
438
- triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
439
- triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
440
- try:
441
- triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
442
- predicate=TRIPLESTORE.username))
443
- triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
444
- predicate=TRIPLESTORE.password))
445
- except (FileNotFoundError, ValueError) as e:
446
- log.error(f"Credential retrieval failed {e}")
447
- triple_store["error"] = e
448
- triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
449
- predicate=TRIPLESTORE.repository)
450
- triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
451
- predicate=TRIPLESTORE.inputGraph)
498
+ get_graphDB_configuration(
499
+ triple_store, triple_store_graph, triple_store_config)
452
500
 
453
- try:
454
- check_triple_store_params(triple_store, ["url", "port", "repository"])
455
- except ValueError as e:
456
- triple_store["error"] = e
457
501
  elif triple_store_type != TRIPLESTORE.RdfLib:
458
502
  triple_store["error"] = f"Triple store not implemented: {triple_store_type}"
459
503
 
@@ -461,15 +505,65 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
461
505
  return triple_stores
462
506
 
463
507
 
508
+ def get_anzo_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
509
+ triple_store["url"] = triple_store_graph.value(
510
+ subject=triple_store_config, predicate=TRIPLESTORE.url)
511
+ triple_store["port"] = triple_store_graph.value(
512
+ subject=triple_store_config, predicate=TRIPLESTORE.port)
513
+ try:
514
+ triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
515
+ predicate=TRIPLESTORE.username))
516
+ triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
517
+ predicate=TRIPLESTORE.password))
518
+ except (FileNotFoundError, ValueError) as e:
519
+ triple_store["error"] = e
520
+ triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
521
+ predicate=TRIPLESTORE.gqeURI)
522
+ triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
523
+ predicate=TRIPLESTORE.inputGraph)
524
+ triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
525
+ predicate=TRIPLESTORE.outputGraph)
526
+ try:
527
+ check_triple_store_params(
528
+ triple_store, ["url", "port", "username", "password", "input_graph"])
529
+ except ValueError as e:
530
+ triple_store["error"] = e
531
+
532
+
533
+ def get_graphDB_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
534
+ triple_store["url"] = triple_store_graph.value(
535
+ subject=triple_store_config, predicate=TRIPLESTORE.url)
536
+ triple_store["port"] = triple_store_graph.value(
537
+ subject=triple_store_config, predicate=TRIPLESTORE.port)
538
+ try:
539
+ triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
540
+ predicate=TRIPLESTORE.username))
541
+ triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
542
+ predicate=TRIPLESTORE.password))
543
+ except (FileNotFoundError, ValueError) as e:
544
+ log.error(f"Credential retrieval failed {e}")
545
+ triple_store["error"] = e
546
+ triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
547
+ predicate=TRIPLESTORE.repository)
548
+ triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
549
+ predicate=TRIPLESTORE.inputGraph)
550
+ try:
551
+ check_triple_store_params(triple_store, ["url", "repository"])
552
+ except ValueError as e:
553
+ triple_store["error"] = e
554
+
555
+
464
556
  def check_triple_store_params(triple_store: dict, required_params: List[str]):
465
- missing_params = [param for param in required_params if triple_store.get(param) is None]
557
+ missing_params = [
558
+ param for param in required_params if triple_store.get(param) is None]
466
559
  if missing_params:
467
560
  raise ValueError(f"Cannot establish connection to {triple_store['type']}. "
468
561
  f"Missing required parameter(s): {', '.join(missing_params)}.")
469
562
 
470
563
 
471
564
  def get_credential_from_file(triple_store_name: URIRef, credential: str, config_path: Literal) -> str:
472
- log.info(f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
565
+ log.info(
566
+ f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
473
567
  if not config_path:
474
568
  raise ValueError(f"Cannot establish connection defined in {triple_store_name}. "
475
569
  f"Missing required parameter: {credential}.")
@@ -508,7 +602,8 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
508
602
  else:
509
603
  values.append(str(XSD.anyURI))
510
604
 
511
- frames = pandas.concat(objs=[frames, pandas.DataFrame([values], columns=columns)], ignore_index=True)
605
+ frames = pandas.concat(objs=[frames, pandas.DataFrame(
606
+ [values], columns=columns)], ignore_index=True)
512
607
  frames.fillna('', inplace=True)
513
608
 
514
609
  if frames.size == 0:
@@ -516,94 +611,124 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
516
611
  return frames
517
612
 
518
613
 
519
- # https://github.com/Semantic-partners/mustrd/issues/110
520
- # https://github.com/Semantic-partners/mustrd/issues/52
521
614
  def table_comparison(result: str, spec: Specification) -> SpecResult:
522
615
  warning = None
523
616
  order_list = ["order by ?", "order by desc", "order by asc"]
524
- ordered_result = any(pattern in spec.when[0].value.lower() for pattern in order_list)
525
- then = spec.then.value
526
- try:
527
- if is_json(result):
528
- df = json_results_to_panda_dataframe(result)
529
- columns = list(df.columns)
530
- else:
531
- raise ParseException
532
- sorted_columns = sorted(columns)
533
- sorted_then_cols = sorted(list(then))
534
- if not df.empty:
617
+ ordered_result = any(
618
+ pattern in spec.when[0].value.lower() for pattern in order_list)
619
+
620
+ # If sparql query doesn't contain order by clause, but order is define in then spec:
621
+ # Then ignore order in then spec and print a warning
622
+ if not ordered_result and spec.then.ordered:
623
+ warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
624
+ log.warning(warning)
625
+
626
+ # If sparql query contains an order by clause and then spec is not order:
627
+ # Spec is inconsistent
628
+ if ordered_result and not spec.then.ordered:
629
+ message = "Actual result is ordered, must:then must contain sh:order on every row."
630
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
631
+
632
+ # Convert results to dataframe
633
+ if is_json(result):
634
+ df = json_results_to_panda_dataframe(result)
635
+ else:
636
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, "Sparql result is not in JSON")
535
637
 
536
- if not ordered_result:
537
- df.sort_values(by=columns[::2], inplace=True)
538
- df.reset_index(inplace=True, drop=True)
539
- if spec.then.ordered:
540
- warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
541
- log.warning(warning)
542
-
543
- # Scenario 1: expected no result but got a result
544
- if then.empty:
545
- message = f"""Expected 0 row(s) and 0 column(s),
546
- got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"""
547
- empty_then = create_empty_dataframe_with_columns(df)
548
- df_diff = empty_then.compare(df, result_names=("expected", "actual"))
638
+ # Compare result with expected
639
+ df_diff, message = compare_table_results(df, spec)
549
640
 
550
- else:
551
- # Scenario 2: expected a result and got a result
552
- # pandas.set_option('display.max_columns', None)
553
- message = f"Expected {then.shape[0]} row(s) and {round(then.shape[1] / 2)} column(s), " \
554
- f"got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"
555
- if ordered_result is True and not spec.then.ordered:
556
- message += ". Actual result is ordered, must:then must contain sh:order on every row."
557
- return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
558
- else:
559
- if len(columns) == len(then.columns):
560
- if sorted_columns == sorted_then_cols:
561
- then = then[columns]
562
- if not ordered_result:
563
- then.sort_values(by=columns[::2], inplace=True)
564
- then.reset_index(drop=True, inplace=True)
565
- if df.shape == then.shape and (df.columns == then.columns).all():
566
- df_diff = then.compare(df, result_names=("expected", "actual"))
567
- else:
568
- df_diff = construct_df_diff(df, then)
569
-
570
- else:
571
- then = then[sorted_then_cols]
572
- df = df[sorted_columns]
573
- df_diff = construct_df_diff(df, then)
574
- else:
575
-
576
- then = then[sorted_then_cols]
577
- df = df[sorted_columns]
578
- df_diff = construct_df_diff(df, then)
641
+ if df_diff.empty:
642
+ if warning:
643
+ return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
579
644
  else:
645
+ return SpecPassed(spec.spec_uri, spec.triple_store["type"])
646
+ else:
647
+ log.error("\n" + df_diff.to_markdown())
648
+ log.error(message)
649
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
580
650
 
581
- if then.empty:
582
- # Scenario 3: expected no result, got no result
583
- message = "Expected 0 row(s) and 0 column(s), got 0 row(s) and 0 column(s)"
584
- df = pandas.DataFrame()
585
- else:
586
- # Scenario 4: expected a result, but got an empty result
587
- message = f"""Expected {then.shape[0]} row(s)
588
- and {round(then.shape[1] / 2)} column(s), got 0 row(s) and 0 column(s)"""
589
- then = then[sorted_then_cols]
590
- df = create_empty_dataframe_with_columns(then)
591
- df_diff = then.compare(df, result_names=("expected", "actual"))
592
-
593
- if df_diff.empty:
594
- if warning:
595
- return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
651
+
652
+ def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
653
+ return not resultDf.empty, not spec.then.value.empty
654
+
655
+
656
+ compare_table_results = MultiMethod(
657
+ "compare_table_results", compare_table_results_dispatch)
658
+
659
+
660
+ # Scenario 1: expected a result and got a result
661
+ @compare_table_results.method((True, True))
662
+ def _compare_results(resultDf: DataFrame, spec: Specification):
663
+ columns = list(resultDf.columns)
664
+ sorted_columns = sorted(columns)
665
+ then = spec.then.value
666
+ sorted_then_cols = sorted(list(then))
667
+ order_list = ["order by ?", "order by desc", "order by asc"]
668
+ ordered_result = any(
669
+ pattern in spec.when[0].value.lower() for pattern in order_list)
670
+
671
+ if not ordered_result:
672
+ resultDf.sort_values(by=list(resultDf.columns)[::2], inplace=True)
673
+ resultDf.reset_index(inplace=True, drop=True)
674
+
675
+ if len(columns) == len(then.columns):
676
+ if sorted_columns == sorted_then_cols:
677
+ then = then[columns]
678
+ if not ordered_result:
679
+ then.sort_values(by=columns[::2], inplace=True)
680
+ then.reset_index(drop=True, inplace=True)
681
+ if resultDf.shape == then.shape and (resultDf.columns == then.columns).all():
682
+ df_diff = then.compare(
683
+ resultDf, result_names=("expected", "actual"))
596
684
  else:
597
- return SpecPassed(spec.spec_uri, spec.triple_store["type"])
685
+ df_diff = construct_df_diff(resultDf, then)
598
686
  else:
599
- log.error("\n" + df_diff.to_markdown())
600
- log.error(message)
601
- return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
687
+ then = then[sorted_then_cols]
688
+ resultDf = resultDf[sorted_columns]
689
+ df_diff = construct_df_diff(resultDf, then)
690
+ else:
691
+ then = then[sorted_then_cols]
692
+ resultDf = resultDf[sorted_columns]
693
+ df_diff = construct_df_diff(resultDf, then)
694
+
695
+ message = build_summary_message(then.shape[0], round(
696
+ then.shape[1] / 2), resultDf.shape[0], round(resultDf.shape[1] / 2))
697
+ return df_diff, message
698
+
699
+
700
+ # Scenario 2: expected no result but got a result
701
+ @compare_table_results.method((True, False))
702
+ def _unexpected_results(resultDf: DataFrame, spec: Specification):
703
+ empty_then = create_empty_dataframe_with_columns(resultDf)
704
+ df_diff = empty_then.compare(resultDf, result_names=("expected", "actual"))
705
+
706
+ return df_diff, build_summary_message(0, 0, resultDf.shape[0], round(resultDf.shape[1] / 2))
707
+
708
+
709
+ # Scenario 3: expected a result, but got an empty result
710
+ @compare_table_results.method((False, True))
711
+ def _missing_results(resultDf: DataFrame, spec: Specification):
712
+ then = spec.then.value
713
+ then = then[sorted(list(then))]
714
+ df = create_empty_dataframe_with_columns(then)
715
+ df_diff = then.compare(df, result_names=("expected", "actual"))
716
+
717
+ return df_diff, build_summary_message(then.shape[0], round(then.shape[1] / 2), 0, 0)
718
+
719
+
720
+ # Scenario 4: expected no result, got no result
721
+ @compare_table_results.method((False, False))
722
+ def _no_results(resultDf: DataFrame, spec: Specification):
723
+ df = pandas.DataFrame()
724
+ df_diff = spec.then.value.compare(df, result_names=("expected", "actual"))
602
725
 
603
- except ParseException as e:
604
- return SparqlParseFailure(spec.spec_uri, spec.triple_store["type"], e)
605
- except NotImplementedError as ex:
606
- return SpecSkipped(spec.spec_uri, spec.triple_store["type"], ex)
726
+ return df_diff, build_summary_message(0, 0, 0, 0)
727
+
728
+
729
+ def build_summary_message(expected_rows, expected_columns, got_rows, got_columns):
730
+ return f"Expected {expected_rows} row(s) and {expected_columns} column(s), " \
731
+ f"got {got_rows} row(s) and {got_columns} column(s)"
607
732
 
608
733
 
609
734
  def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphComparison:
@@ -637,6 +762,35 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
637
762
  return expected_results
638
763
 
639
764
 
765
+ def write_result_diff_to_log(res, info):
766
+ if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
767
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
768
+ info(f"{Fore.BLUE} In Expected Not In Actual:")
769
+ info(
770
+ res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
771
+ info(f"{Fore.RED} in_actual_not_in_expected")
772
+ info(
773
+ res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
774
+ info(f"{Fore.GREEN} in_both")
775
+ info(res.graph_comparison.in_both.serialize(format="ttl"))
776
+
777
+ if isinstance(res, SelectSpecFailure):
778
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
779
+ info(res.message)
780
+ info(res.table_comparison.to_markdown())
781
+ if isinstance(res, SpecPassedWithWarning):
782
+ info(
783
+ f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
784
+ info(res.warning)
785
+ if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
786
+ isinstance(res, SparqlParseFailure):
787
+ info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
788
+ info(res.exception)
789
+ if isinstance(res, SpecSkipped):
790
+ info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
791
+ info(res.message)
792
+
793
+
640
794
  def calculate_row_difference(df1: pandas.DataFrame,
641
795
  df2: pandas.DataFrame) -> pandas.DataFrame:
642
796
  df_all = df1.merge(df2.drop_duplicates(), how='left', indicator=True)
@@ -657,12 +811,16 @@ def construct_df_diff(df: pandas.DataFrame,
657
811
  modified_then = then
658
812
 
659
813
  if actual_columns.size > 0:
660
- modified_then = modified_then.reindex(modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
661
- modified_then[actual_columns.to_list()] = modified_then[actual_columns.to_list()].fillna('')
814
+ modified_then = modified_then.reindex(
815
+ modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
816
+ modified_then[actual_columns.to_list(
817
+ )] = modified_then[actual_columns.to_list()].fillna('')
662
818
 
663
819
  if expected_columns.size > 0:
664
- modified_df = modified_df.reindex(modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
665
- modified_df[expected_columns.to_list()] = modified_df[expected_columns.to_list()].fillna('')
820
+ modified_df = modified_df.reindex(
821
+ modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
822
+ modified_df[expected_columns.to_list(
823
+ )] = modified_df[expected_columns.to_list()].fillna('')
666
824
 
667
825
  modified_df = modified_df.reindex(modified_then.columns, axis=1)
668
826
 
@@ -686,13 +844,17 @@ def generate_row_diff(actual_rows: pandas.DataFrame, expected_rows: pandas.DataF
686
844
 
687
845
  if actual_rows.shape[0] > 0:
688
846
  empty_actual_copy = create_empty_dataframe_with_columns(actual_rows)
689
- df_diff_actual_rows = empty_actual_copy.compare(actual_rows, result_names=("expected", "actual"))
847
+ df_diff_actual_rows = empty_actual_copy.compare(
848
+ actual_rows, result_names=("expected", "actual"))
690
849
 
691
850
  if expected_rows.shape[0] > 0:
692
- empty_expected_copy = create_empty_dataframe_with_columns(expected_rows)
693
- df_diff_expected_rows = expected_rows.compare(empty_expected_copy, result_names=("expected", "actual"))
851
+ empty_expected_copy = create_empty_dataframe_with_columns(
852
+ expected_rows)
853
+ df_diff_expected_rows = expected_rows.compare(
854
+ empty_expected_copy, result_names=("expected", "actual"))
694
855
 
695
- df_diff_rows = pandas.concat([df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
856
+ df_diff_rows = pandas.concat(
857
+ [df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
696
858
  return df_diff_rows
697
859
 
698
860
 
@@ -707,15 +869,18 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
707
869
  # Init dictionaries
708
870
  status_dict = defaultdict(lambda: defaultdict(int))
709
871
  status_counts = defaultdict(lambda: defaultdict(int))
710
- colours = {SpecPassed: Fore.GREEN, SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
872
+ colours = {SpecPassed: Fore.GREEN,
873
+ SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
711
874
  # Populate dictionaries from results
712
875
  for result in results:
713
876
  status_counts[result.triple_store][type(result)] += 1
714
877
  status_dict[result.spec_uri][result.triple_store] = type(result)
715
878
 
716
879
  # Get the list of statuses and list of unique triple stores
717
- statuses = list(status for inner_dict in status_dict.values() for status in inner_dict.values())
718
- triple_stores = list(set(status for inner_dict in status_dict.values() for status in inner_dict.keys()))
880
+ statuses = list(status for inner_dict in status_dict.values()
881
+ for status in inner_dict.values())
882
+ triple_stores = list(set(status for inner_dict in status_dict.values()
883
+ for status in inner_dict.keys()))
719
884
 
720
885
  # Convert dictionaries to list for tabulate
721
886
  table_rows = [[spec_uri] + [
@@ -728,8 +893,10 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
728
893
  for triple_store in triple_stores] for status in set(statuses)]
729
894
 
730
895
  # Display tables with tabulate
731
- log.info(tabulate(table_rows, headers=['Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
732
- log.info(tabulate(status_rows, headers=['Status / triple stores'] + triple_stores, tablefmt="pretty"))
896
+ log.info(tabulate(table_rows, headers=[
897
+ 'Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
898
+ log.info(tabulate(status_rows, headers=[
899
+ 'Status / triple stores'] + triple_stores, tablefmt="pretty"))
733
900
 
734
901
  pass_count = statuses.count(SpecPassed)
735
902
  warning_count = statuses.count(SpecPassedWithWarning)
@@ -746,33 +913,40 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
746
913
 
747
914
  logger_setup.flush()
748
915
  log.info(f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
749
- f"{overview_colour}{warning_count} passed with warnings =====")
916
+ f"{overview_colour}{warning_count} passed with warnings =====")
750
917
 
751
918
  if verbose and (fail_count or warning_count or skipped_count):
752
- for res in results:
753
- if isinstance(res, UpdateSpecFailure):
754
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
755
- log.info(f"{Fore.BLUE} In Expected Not In Actual:")
756
- log.info(res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
757
- log.info()
758
- log.info(f"{Fore.RED} in_actual_not_in_expected")
759
- log.info(res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
760
- log.info(f"{Fore.GREEN} in_both")
761
- log.info(res.graph_comparison.in_both.serialize(format="ttl"))
762
-
763
- if isinstance(res, SelectSpecFailure):
764
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
765
- log.info(res.message)
766
- log.info(res.table_comparison.to_markdown())
767
- if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
768
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
769
- if isinstance(res, SpecPassedWithWarning):
770
- log.info(f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
771
- log.info(res.warning)
772
- if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
773
- isinstance(res, SparqlParseFailure):
774
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
775
- log.info(res.exception)
776
- if isinstance(res, SpecSkipped):
777
- log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
778
- log.info(res.message)
919
+ display_verbose(results)
920
+
921
+
922
+ def display_verbose(results: List[SpecResult]):
923
+ for res in results:
924
+ if isinstance(res, UpdateSpecFailure):
925
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
926
+ log.info(f"{Fore.BLUE} In Expected Not In Actual:")
927
+ log.info(
928
+ res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
929
+ log.info()
930
+ log.info(f"{Fore.RED} in_actual_not_in_expected")
931
+ log.info(
932
+ res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
933
+ log.info(f"{Fore.GREEN} in_both")
934
+ log.info(res.graph_comparison.in_both.serialize(format="ttl"))
935
+
936
+ if isinstance(res, SelectSpecFailure):
937
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
938
+ log.info(res.message)
939
+ log.info(res.table_comparison.to_markdown())
940
+ if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
941
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
942
+ if isinstance(res, SpecPassedWithWarning):
943
+ log.info(
944
+ f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
945
+ log.info(res.warning)
946
+ if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
947
+ isinstance(res, SparqlParseFailure):
948
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
949
+ log.info(res.exception)
950
+ if isinstance(res, SpecSkipped):
951
+ log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
952
+ log.info(res.message)