mustrd 0.2.6.1__py3-none-any.whl → 0.2.7a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mustrd/mustrd.py CHANGED
@@ -23,7 +23,7 @@ SOFTWARE.
23
23
  """
24
24
 
25
25
  import os
26
- from typing import Tuple, List
26
+ from typing import Tuple, List, Union
27
27
 
28
28
  import tomli
29
29
  from rdflib.plugins.parsers.notation3 import BadSyntax
@@ -54,6 +54,7 @@ from pyshacl import validate
54
54
  import logging
55
55
  from http.client import HTTPConnection
56
56
  from .steprunner import upload_given, run_when
57
+ from multimethods import MultiMethod
57
58
 
58
59
  log = logger_setup.setup_logger(__name__)
59
60
 
@@ -89,7 +90,7 @@ def debug_requests_off():
89
90
  debug_requests_off()
90
91
 
91
92
 
92
- @dataclass
93
+ @dataclass(frozen=True)
93
94
  class Specification:
94
95
  spec_uri: URIRef
95
96
  triple_store: dict
@@ -97,6 +98,7 @@ class Specification:
97
98
  when: WhenSpec
98
99
  then: ThenSpec
99
100
  spec_file_name: str = "default.mustrd.ttl"
101
+ spec_source_file: Path = Path("default.mustrd.ttl")
100
102
 
101
103
 
102
104
  @dataclass
@@ -157,6 +159,7 @@ class TripleStoreConnectionError(SpecResult):
157
159
  class SpecSkipped(SpecResult):
158
160
  message: str
159
161
  spec_file_name: str = "default.mustrd.ttl"
162
+ spec_source_file: Path = Path("default.mustrd.ttl")
160
163
 
161
164
 
162
165
  @dataclass
@@ -180,21 +183,25 @@ class UpdateSparqlQuery(SparqlAction):
180
183
 
181
184
 
182
185
  # https://github.com/Semantic-partners/mustrd/issues/19
183
-
186
+ # Validate the specs found in spec_path
184
187
  def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
185
188
  -> Tuple[List, Graph, List]:
186
189
  spec_graph = Graph()
187
190
  subject_uris = set()
188
191
  focus_uris = set()
189
192
  invalid_specs = []
190
- ttl_files = list(run_config['spec_path'].glob(f'**/{file_name}.mustrd.ttl'))
193
+ ttl_files = list(run_config['spec_path'].glob(
194
+ f'**/{file_name}.mustrd.ttl'))
191
195
  ttl_files.sort()
192
- log.info(f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
196
+ log.info(
197
+ f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
193
198
 
199
+ # For each spec file found in spec_path
194
200
  for file in ttl_files:
195
201
  error_messages = []
196
202
 
197
203
  log.info(f"Parse: {file}")
204
+ # Parse spec file and add error message if not conform to RDF standard
198
205
  try:
199
206
  file_graph = Graph().parse(file)
200
207
  except BadSyntax as e:
@@ -204,6 +211,7 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
204
211
  error_messages += [f"Could not extract spec from {file} due to exception of type "
205
212
  f"{type(e).__name__} when parsing file"]
206
213
  continue
214
+
207
215
  # run shacl validation
208
216
  conforms, results_graph, results_text = validate(file_graph,
209
217
  shacl_graph=shacl_graph,
@@ -216,6 +224,8 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
216
224
  advanced=True,
217
225
  js=False,
218
226
  debug=False)
227
+
228
+ # Add error message if not conform to spec shapes
219
229
  if not conforms:
220
230
  for msg in results_graph.objects(predicate=SH.resultMessage):
221
231
  log.warning(f"{file_graph}")
@@ -223,47 +233,66 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
223
233
  error_messages += [f"{msg} File: {file.name}"]
224
234
 
225
235
  # collect a list of uris of the tests in focus
236
+ # If focus is found, only the spec in the focus will be executed
226
237
  for focus_uri in file_graph.subjects(predicate=MUST.focus, object=Literal("true", datatype=XSD.boolean)):
227
238
  if focus_uri in focus_uris:
228
239
  focus_uri = URIRef(str(focus_uri) + "_DUPLICATE")
229
240
  focus_uris.add(focus_uri)
230
241
 
231
- # make sure there are no duplicate test IRIs in the files
232
- for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
233
- if subject_uri in subject_uris:
234
- log.warning(f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
235
- error_messages += [f"Duplicate subject URI found in {file.name}."]
236
- subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
237
- if len(error_messages) > 0:
238
- error_messages.sort()
239
- error_message = "\n".join(msg for msg in error_messages)
240
- invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
241
- for triple_store in triple_stores]
242
- else:
243
- subject_uris.add(subject_uri)
244
- this_spec_graph = Graph()
245
- this_spec_graph.parse(file)
246
- spec_uris_in_this_file = list(this_spec_graph.subjects(RDF.type, MUST.TestSpec))
247
- for spec in spec_uris_in_this_file:
248
- this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
249
- this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
250
- spec_graph += this_spec_graph
242
+ add_spec_validation(file_graph, subject_uris, file,
243
+ triple_stores, error_messages, invalid_specs, spec_graph)
251
244
 
252
245
  valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
253
246
 
254
247
  if focus_uris:
255
- invalid_focus_specs = []
256
- for spec in invalid_specs:
257
- if spec.spec_uri in focus_uris:
258
- invalid_focus_specs += [spec]
259
- focus_uris.remove(spec.spec_uri)
260
- log.info(f"Collected {len(focus_uris)} focus test spec(s)")
248
+ invalid_focus_specs = get_invalid_focus_spec(focus_uris, invalid_specs)
261
249
  return focus_uris, spec_graph, invalid_focus_specs
262
250
  else:
263
251
  log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
264
252
  return valid_spec_uris, spec_graph, invalid_specs
265
253
 
266
254
 
255
+ def get_invalid_focus_spec(focus_uris: set, invalid_specs: list):
256
+ invalid_focus_specs = []
257
+ for spec in invalid_specs:
258
+ if spec.spec_uri in focus_uris:
259
+ invalid_focus_specs += [spec]
260
+ focus_uris.remove(spec.spec_uri)
261
+ log.info(f"Collected {len(focus_uris)} focus test spec(s)")
262
+ return invalid_focus_specs
263
+
264
+
265
+ # Detect duplicate,
266
+ # If no error: associate the spec configuration and the file where this conf is stored
267
+ # If error, aggregate the messages and mark spec as skipped
268
+ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple_stores: List,
269
+ error_messages: list, invalid_specs: list, spec_graph: Graph):
270
+
271
+ for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
272
+ # If we already collected a URI, then we tag it as duplicate and it won't be executed
273
+ if subject_uri in subject_uris:
274
+ log.warning(
275
+ f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
276
+ error_messages += [f"Duplicate subject URI found in {file.name}."]
277
+ subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
278
+ if len(error_messages) == 0:
279
+ subject_uris.add(subject_uri)
280
+ this_spec_graph = Graph()
281
+ this_spec_graph.parse(file)
282
+ spec_uris_in_this_file = list(
283
+ this_spec_graph.subjects(RDF.type, MUST.TestSpec))
284
+ for spec in spec_uris_in_this_file:
285
+ this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
286
+ this_spec_graph.add(
287
+ [spec, MUST.specFileName, Literal(file.name)])
288
+ spec_graph += this_spec_graph
289
+ else:
290
+ error_messages.sort()
291
+ error_message = "\n".join(msg for msg in error_messages)
292
+ invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name, file)
293
+ for triple_store in triple_stores]
294
+
295
+
267
296
  def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[dict],
268
297
  run_config: dict):
269
298
  specs = []
@@ -271,14 +300,16 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
271
300
  try:
272
301
  for triple_store in triple_stores:
273
302
  if "error" in triple_store:
274
- log.error(f"{triple_store['error']}. No specs run for this triple store.")
303
+ log.error(
304
+ f"{triple_store['error']}. No specs run for this triple store.")
275
305
  skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
276
306
  get_spec_file(spec_uri, spec_graph)) for spec_uri in
277
307
  spec_uris]
278
308
  else:
279
309
  for spec_uri in spec_uris:
280
310
  try:
281
- specs += [get_spec(spec_uri, spec_graph, run_config, triple_store)]
311
+ specs += [get_spec(spec_uri, spec_graph,
312
+ run_config, triple_store)]
282
313
  except (ValueError, FileNotFoundError, ConnectionError) as e:
283
314
  skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
284
315
  e, get_spec_file(spec_uri, spec_graph))]
@@ -319,9 +350,11 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
319
350
  mustrd_triple_store=mustrd_triple_store))
320
351
 
321
352
  spec_file_name = get_spec_file(spec_uri, spec_graph)
353
+ spec_file_path = Path(spec_graph.value(
354
+ subject=spec_uri, predicate=MUST.specSourceFile, default=Path("default.mustrd.ttl")))
322
355
  # https://github.com/Semantic-partners/mustrd/issues/92
323
356
  return Specification(spec_uri, mustrd_triple_store,
324
- components[0].value, components[1], components[2], spec_file_name)
357
+ components[0].value, components[1], components[2], spec_file_name, spec_file_path)
325
358
 
326
359
  except (ValueError, FileNotFoundError) as e:
327
360
  template = "An exception of type {0} occurred. Arguments:\n{1!r}"
@@ -333,7 +366,7 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
333
366
  raise
334
367
 
335
368
 
336
- def check_result(spec, result):
369
+ def check_result(spec: Specification, result: Union[str, Graph]):
337
370
  if isinstance(spec.then, TableThenSpec):
338
371
  return table_comparison(result, spec)
339
372
  else:
@@ -351,7 +384,8 @@ def run_spec(spec: Specification) -> SpecResult:
351
384
  spec_uri = spec.spec_uri
352
385
  triple_store = spec.triple_store
353
386
  # close_connection = True
354
- log.debug(f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
387
+ log.debug(
388
+ f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
355
389
  if spec.given:
356
390
  given_as_turtle = spec.given.serialize(format="turtle")
357
391
  log.debug(f"{given_as_turtle}")
@@ -361,7 +395,8 @@ def run_spec(spec: Specification) -> SpecResult:
361
395
  return SpecSkipped(spec_uri, triple_store['type'], "Unable to run Inherited State tests on Rdflib")
362
396
  try:
363
397
  for when in spec.when:
364
- log.info(f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
398
+ log.info(
399
+ f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
365
400
  try:
366
401
  result = run_when(spec_uri, triple_store, when)
367
402
  except ParseException as e:
@@ -394,17 +429,21 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
394
429
  return Graph().parse(triple_store_graph_path).parse(secret_path)
395
430
 
396
431
 
432
+ # Parse and validate triple store configuration
397
433
  def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
398
434
  triple_stores = []
399
- shacl_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
400
- ont_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
435
+ shacl_graph = Graph().parse(
436
+ Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
437
+ ont_graph = Graph().parse(
438
+ Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
439
+ # SHACL validation of triple store configuration
401
440
  conforms, results_graph, results_text = validate(
402
- data_graph=triple_store_graph,
403
- shacl_graph=shacl_graph,
404
- ont_graph=ont_graph,
405
- advanced=True,
406
- inference='none'
407
- )
441
+ data_graph=triple_store_graph,
442
+ shacl_graph=shacl_graph,
443
+ ont_graph=ont_graph,
444
+ advanced=True,
445
+ inference='none'
446
+ )
408
447
  if not conforms:
409
448
  raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
410
449
  results_graph)
@@ -414,46 +453,13 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
414
453
  triple_store["uri"] = triple_store_config
415
454
  # Anzo graph via anzo
416
455
  if triple_store_type == TRIPLESTORE.Anzo:
417
- triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
418
- triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
419
- try:
420
- triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
421
- predicate=TRIPLESTORE.username))
422
- triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
423
- predicate=TRIPLESTORE.password))
424
- except (FileNotFoundError, ValueError) as e:
425
- triple_store["error"] = e
426
- triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
427
- predicate=TRIPLESTORE.gqeURI)
428
- triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
429
- predicate=TRIPLESTORE.inputGraph)
430
- triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
431
- predicate=TRIPLESTORE.outputGraph)
432
- try:
433
- check_triple_store_params(triple_store, ["url", "port", "username", "password", "input_graph"])
434
- except ValueError as e:
435
- triple_store["error"] = e
456
+ get_anzo_configuration(
457
+ triple_store, triple_store_graph, triple_store_config)
436
458
  # GraphDB
437
459
  elif triple_store_type == TRIPLESTORE.GraphDb:
438
- triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
439
- triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
440
- try:
441
- triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
442
- predicate=TRIPLESTORE.username))
443
- triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
444
- predicate=TRIPLESTORE.password))
445
- except (FileNotFoundError, ValueError) as e:
446
- log.error(f"Credential retrieval failed {e}")
447
- triple_store["error"] = e
448
- triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
449
- predicate=TRIPLESTORE.repository)
450
- triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
451
- predicate=TRIPLESTORE.inputGraph)
460
+ get_graphDB_configuration(
461
+ triple_store, triple_store_graph, triple_store_config)
452
462
 
453
- try:
454
- check_triple_store_params(triple_store, ["url", "port", "repository"])
455
- except ValueError as e:
456
- triple_store["error"] = e
457
463
  elif triple_store_type != TRIPLESTORE.RdfLib:
458
464
  triple_store["error"] = f"Triple store not implemented: {triple_store_type}"
459
465
 
@@ -461,15 +467,65 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
461
467
  return triple_stores
462
468
 
463
469
 
470
+ def get_anzo_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
471
+ triple_store["url"] = triple_store_graph.value(
472
+ subject=triple_store_config, predicate=TRIPLESTORE.url)
473
+ triple_store["port"] = triple_store_graph.value(
474
+ subject=triple_store_config, predicate=TRIPLESTORE.port)
475
+ try:
476
+ triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
477
+ predicate=TRIPLESTORE.username))
478
+ triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
479
+ predicate=TRIPLESTORE.password))
480
+ except (FileNotFoundError, ValueError) as e:
481
+ triple_store["error"] = e
482
+ triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
483
+ predicate=TRIPLESTORE.gqeURI)
484
+ triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
485
+ predicate=TRIPLESTORE.inputGraph)
486
+ triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
487
+ predicate=TRIPLESTORE.outputGraph)
488
+ try:
489
+ check_triple_store_params(
490
+ triple_store, ["url", "port", "username", "password", "input_graph"])
491
+ except ValueError as e:
492
+ triple_store["error"] = e
493
+
494
+
495
+ def get_graphDB_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
496
+ triple_store["url"] = triple_store_graph.value(
497
+ subject=triple_store_config, predicate=TRIPLESTORE.url)
498
+ triple_store["port"] = triple_store_graph.value(
499
+ subject=triple_store_config, predicate=TRIPLESTORE.port)
500
+ try:
501
+ triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
502
+ predicate=TRIPLESTORE.username))
503
+ triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
504
+ predicate=TRIPLESTORE.password))
505
+ except (FileNotFoundError, ValueError) as e:
506
+ log.error(f"Credential retrieval failed {e}")
507
+ triple_store["error"] = e
508
+ triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
509
+ predicate=TRIPLESTORE.repository)
510
+ triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
511
+ predicate=TRIPLESTORE.inputGraph)
512
+ try:
513
+ check_triple_store_params(triple_store, ["url", "repository"])
514
+ except ValueError as e:
515
+ triple_store["error"] = e
516
+
517
+
464
518
  def check_triple_store_params(triple_store: dict, required_params: List[str]):
465
- missing_params = [param for param in required_params if triple_store.get(param) is None]
519
+ missing_params = [
520
+ param for param in required_params if triple_store.get(param) is None]
466
521
  if missing_params:
467
522
  raise ValueError(f"Cannot establish connection to {triple_store['type']}. "
468
523
  f"Missing required parameter(s): {', '.join(missing_params)}.")
469
524
 
470
525
 
471
526
  def get_credential_from_file(triple_store_name: URIRef, credential: str, config_path: Literal) -> str:
472
- log.info(f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
527
+ log.info(
528
+ f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
473
529
  if not config_path:
474
530
  raise ValueError(f"Cannot establish connection defined in {triple_store_name}. "
475
531
  f"Missing required parameter: {credential}.")
@@ -508,7 +564,8 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
508
564
  else:
509
565
  values.append(str(XSD.anyURI))
510
566
 
511
- frames = pandas.concat(objs=[frames, pandas.DataFrame([values], columns=columns)], ignore_index=True)
567
+ frames = pandas.concat(objs=[frames, pandas.DataFrame(
568
+ [values], columns=columns)], ignore_index=True)
512
569
  frames.fillna('', inplace=True)
513
570
 
514
571
  if frames.size == 0:
@@ -516,94 +573,124 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
516
573
  return frames
517
574
 
518
575
 
519
- # https://github.com/Semantic-partners/mustrd/issues/110
520
- # https://github.com/Semantic-partners/mustrd/issues/52
521
576
  def table_comparison(result: str, spec: Specification) -> SpecResult:
522
577
  warning = None
523
578
  order_list = ["order by ?", "order by desc", "order by asc"]
524
- ordered_result = any(pattern in spec.when[0].value.lower() for pattern in order_list)
525
- then = spec.then.value
526
- try:
527
- if is_json(result):
528
- df = json_results_to_panda_dataframe(result)
529
- columns = list(df.columns)
530
- else:
531
- raise ParseException
532
- sorted_columns = sorted(columns)
533
- sorted_then_cols = sorted(list(then))
534
- if not df.empty:
579
+ ordered_result = any(
580
+ pattern in spec.when[0].value.lower() for pattern in order_list)
581
+
582
+ # If sparql query doesn't contain order by clause, but order is define in then spec:
583
+ # Then ignore order in then spec and print a warning
584
+ if not ordered_result and spec.then.ordered:
585
+ warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
586
+ log.warning(warning)
587
+
588
+ # If sparql query contains an order by clause and then spec is not order:
589
+ # Spec is inconsistent
590
+ if ordered_result and not spec.then.ordered:
591
+ message = "Actual result is ordered, must:then must contain sh:order on every row."
592
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
593
+
594
+ # Convert results to dataframe
595
+ if is_json(result):
596
+ df = json_results_to_panda_dataframe(result)
597
+ else:
598
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, "Sparql result is not in JSON")
535
599
 
536
- if not ordered_result:
537
- df.sort_values(by=columns[::2], inplace=True)
538
- df.reset_index(inplace=True, drop=True)
539
- if spec.then.ordered:
540
- warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
541
- log.warning(warning)
542
-
543
- # Scenario 1: expected no result but got a result
544
- if then.empty:
545
- message = f"""Expected 0 row(s) and 0 column(s),
546
- got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"""
547
- empty_then = create_empty_dataframe_with_columns(df)
548
- df_diff = empty_then.compare(df, result_names=("expected", "actual"))
600
+ # Compare result with expected
601
+ df_diff, message = compare_table_results(df, spec)
549
602
 
550
- else:
551
- # Scenario 2: expected a result and got a result
552
- # pandas.set_option('display.max_columns', None)
553
- message = f"Expected {then.shape[0]} row(s) and {round(then.shape[1] / 2)} column(s), " \
554
- f"got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"
555
- if ordered_result is True and not spec.then.ordered:
556
- message += ". Actual result is ordered, must:then must contain sh:order on every row."
557
- return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
558
- else:
559
- if len(columns) == len(then.columns):
560
- if sorted_columns == sorted_then_cols:
561
- then = then[columns]
562
- if not ordered_result:
563
- then.sort_values(by=columns[::2], inplace=True)
564
- then.reset_index(drop=True, inplace=True)
565
- if df.shape == then.shape and (df.columns == then.columns).all():
566
- df_diff = then.compare(df, result_names=("expected", "actual"))
567
- else:
568
- df_diff = construct_df_diff(df, then)
569
-
570
- else:
571
- then = then[sorted_then_cols]
572
- df = df[sorted_columns]
573
- df_diff = construct_df_diff(df, then)
574
- else:
575
-
576
- then = then[sorted_then_cols]
577
- df = df[sorted_columns]
578
- df_diff = construct_df_diff(df, then)
603
+ if df_diff.empty:
604
+ if warning:
605
+ return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
579
606
  else:
607
+ return SpecPassed(spec.spec_uri, spec.triple_store["type"])
608
+ else:
609
+ log.error("\n" + df_diff.to_markdown())
610
+ log.error(message)
611
+ return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
580
612
 
581
- if then.empty:
582
- # Scenario 3: expected no result, got no result
583
- message = "Expected 0 row(s) and 0 column(s), got 0 row(s) and 0 column(s)"
584
- df = pandas.DataFrame()
585
- else:
586
- # Scenario 4: expected a result, but got an empty result
587
- message = f"""Expected {then.shape[0]} row(s)
588
- and {round(then.shape[1] / 2)} column(s), got 0 row(s) and 0 column(s)"""
589
- then = then[sorted_then_cols]
590
- df = create_empty_dataframe_with_columns(then)
591
- df_diff = then.compare(df, result_names=("expected", "actual"))
592
-
593
- if df_diff.empty:
594
- if warning:
595
- return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
613
+
614
+ def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
615
+ return not resultDf.empty, not spec.then.value.empty
616
+
617
+
618
+ compare_table_results = MultiMethod(
619
+ "compare_table_results", compare_table_results_dispatch)
620
+
621
+
622
+ # Scenario 1: expected a result and got a result
623
+ @compare_table_results.method((True, True))
624
+ def _compare_results(resultDf: DataFrame, spec: Specification):
625
+ columns = list(resultDf.columns)
626
+ sorted_columns = sorted(columns)
627
+ then = spec.then.value
628
+ sorted_then_cols = sorted(list(then))
629
+ order_list = ["order by ?", "order by desc", "order by asc"]
630
+ ordered_result = any(
631
+ pattern in spec.when[0].value.lower() for pattern in order_list)
632
+
633
+ if not ordered_result:
634
+ resultDf.sort_values(by=list(resultDf.columns)[::2], inplace=True)
635
+ resultDf.reset_index(inplace=True, drop=True)
636
+
637
+ if len(columns) == len(then.columns):
638
+ if sorted_columns == sorted_then_cols:
639
+ then = then[columns]
640
+ if not ordered_result:
641
+ then.sort_values(by=columns[::2], inplace=True)
642
+ then.reset_index(drop=True, inplace=True)
643
+ if resultDf.shape == then.shape and (resultDf.columns == then.columns).all():
644
+ df_diff = then.compare(
645
+ resultDf, result_names=("expected", "actual"))
596
646
  else:
597
- return SpecPassed(spec.spec_uri, spec.triple_store["type"])
647
+ df_diff = construct_df_diff(resultDf, then)
598
648
  else:
599
- log.error("\n" + df_diff.to_markdown())
600
- log.error(message)
601
- return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
649
+ then = then[sorted_then_cols]
650
+ resultDf = resultDf[sorted_columns]
651
+ df_diff = construct_df_diff(resultDf, then)
652
+ else:
653
+ then = then[sorted_then_cols]
654
+ resultDf = resultDf[sorted_columns]
655
+ df_diff = construct_df_diff(resultDf, then)
602
656
 
603
- except ParseException as e:
604
- return SparqlParseFailure(spec.spec_uri, spec.triple_store["type"], e)
605
- except NotImplementedError as ex:
606
- return SpecSkipped(spec.spec_uri, spec.triple_store["type"], ex)
657
+ message = build_summary_message(then.shape[0], round(
658
+ then.shape[1] / 2), resultDf.shape[0], round(resultDf.shape[1] / 2))
659
+ return df_diff, message
660
+
661
+
662
+ # Scenario 2: expected no result but got a result
663
+ @compare_table_results.method((True, False))
664
+ def _unexpected_results(resultDf: DataFrame, spec: Specification):
665
+ empty_then = create_empty_dataframe_with_columns(resultDf)
666
+ df_diff = empty_then.compare(resultDf, result_names=("expected", "actual"))
667
+
668
+ return df_diff, build_summary_message(0, 0, resultDf.shape[0], round(resultDf.shape[1] / 2))
669
+
670
+
671
+ # Scenario 3: expected a result, but got an empty result
672
+ @compare_table_results.method((False, True))
673
+ def _missing_results(resultDf: DataFrame, spec: Specification):
674
+ then = spec.then.value
675
+ then = then[sorted(list(then))]
676
+ df = create_empty_dataframe_with_columns(then)
677
+ df_diff = then.compare(df, result_names=("expected", "actual"))
678
+
679
+ return df_diff, build_summary_message(then.shape[0], round(then.shape[1] / 2), 0, 0)
680
+
681
+
682
+ # Scenario 4: expected no result, got no result
683
+ @compare_table_results.method((False, False))
684
+ def _no_results(resultDf: DataFrame, spec: Specification):
685
+ df = pandas.DataFrame()
686
+ df_diff = spec.then.value.compare(df, result_names=("expected", "actual"))
687
+
688
+ return df_diff, build_summary_message(0, 0, 0, 0)
689
+
690
+
691
+ def build_summary_message(expected_rows, expected_columns, got_rows, got_columns):
692
+ return f"Expected {expected_rows} row(s) and {expected_columns} column(s), " \
693
+ f"got {got_rows} row(s) and {got_columns} column(s)"
607
694
 
608
695
 
609
696
  def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphComparison:
@@ -637,6 +724,35 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
637
724
  return expected_results
638
725
 
639
726
 
727
+ def write_result_diff_to_log(res):
728
+ if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
729
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
730
+ log.info(f"{Fore.BLUE} In Expected Not In Actual:")
731
+ log.info(
732
+ res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
733
+ log.info(f"{Fore.RED} in_actual_not_in_expected")
734
+ log.info(
735
+ res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
736
+ log.info(f"{Fore.GREEN} in_both")
737
+ log.info(res.graph_comparison.in_both.serialize(format="ttl"))
738
+
739
+ if isinstance(res, SelectSpecFailure):
740
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
741
+ log.info(res.message)
742
+ log.info(res.table_comparison.to_markdown())
743
+ if isinstance(res, SpecPassedWithWarning):
744
+ log.info(
745
+ f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
746
+ log.info(res.warning)
747
+ if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
748
+ isinstance(res, SparqlParseFailure):
749
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
750
+ log.info(res.exception)
751
+ if isinstance(res, SpecSkipped):
752
+ log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
753
+ log.info(res.message)
754
+
755
+
640
756
  def calculate_row_difference(df1: pandas.DataFrame,
641
757
  df2: pandas.DataFrame) -> pandas.DataFrame:
642
758
  df_all = df1.merge(df2.drop_duplicates(), how='left', indicator=True)
@@ -657,12 +773,16 @@ def construct_df_diff(df: pandas.DataFrame,
657
773
  modified_then = then
658
774
 
659
775
  if actual_columns.size > 0:
660
- modified_then = modified_then.reindex(modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
661
- modified_then[actual_columns.to_list()] = modified_then[actual_columns.to_list()].fillna('')
776
+ modified_then = modified_then.reindex(
777
+ modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
778
+ modified_then[actual_columns.to_list(
779
+ )] = modified_then[actual_columns.to_list()].fillna('')
662
780
 
663
781
  if expected_columns.size > 0:
664
- modified_df = modified_df.reindex(modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
665
- modified_df[expected_columns.to_list()] = modified_df[expected_columns.to_list()].fillna('')
782
+ modified_df = modified_df.reindex(
783
+ modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
784
+ modified_df[expected_columns.to_list(
785
+ )] = modified_df[expected_columns.to_list()].fillna('')
666
786
 
667
787
  modified_df = modified_df.reindex(modified_then.columns, axis=1)
668
788
 
@@ -686,13 +806,17 @@ def generate_row_diff(actual_rows: pandas.DataFrame, expected_rows: pandas.DataF
686
806
 
687
807
  if actual_rows.shape[0] > 0:
688
808
  empty_actual_copy = create_empty_dataframe_with_columns(actual_rows)
689
- df_diff_actual_rows = empty_actual_copy.compare(actual_rows, result_names=("expected", "actual"))
809
+ df_diff_actual_rows = empty_actual_copy.compare(
810
+ actual_rows, result_names=("expected", "actual"))
690
811
 
691
812
  if expected_rows.shape[0] > 0:
692
- empty_expected_copy = create_empty_dataframe_with_columns(expected_rows)
693
- df_diff_expected_rows = expected_rows.compare(empty_expected_copy, result_names=("expected", "actual"))
813
+ empty_expected_copy = create_empty_dataframe_with_columns(
814
+ expected_rows)
815
+ df_diff_expected_rows = expected_rows.compare(
816
+ empty_expected_copy, result_names=("expected", "actual"))
694
817
 
695
- df_diff_rows = pandas.concat([df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
818
+ df_diff_rows = pandas.concat(
819
+ [df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
696
820
  return df_diff_rows
697
821
 
698
822
 
@@ -707,15 +831,18 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
707
831
  # Init dictionaries
708
832
  status_dict = defaultdict(lambda: defaultdict(int))
709
833
  status_counts = defaultdict(lambda: defaultdict(int))
710
- colours = {SpecPassed: Fore.GREEN, SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
834
+ colours = {SpecPassed: Fore.GREEN,
835
+ SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
711
836
  # Populate dictionaries from results
712
837
  for result in results:
713
838
  status_counts[result.triple_store][type(result)] += 1
714
839
  status_dict[result.spec_uri][result.triple_store] = type(result)
715
840
 
716
841
  # Get the list of statuses and list of unique triple stores
717
- statuses = list(status for inner_dict in status_dict.values() for status in inner_dict.values())
718
- triple_stores = list(set(status for inner_dict in status_dict.values() for status in inner_dict.keys()))
842
+ statuses = list(status for inner_dict in status_dict.values()
843
+ for status in inner_dict.values())
844
+ triple_stores = list(set(status for inner_dict in status_dict.values()
845
+ for status in inner_dict.keys()))
719
846
 
720
847
  # Convert dictionaries to list for tabulate
721
848
  table_rows = [[spec_uri] + [
@@ -728,8 +855,10 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
728
855
  for triple_store in triple_stores] for status in set(statuses)]
729
856
 
730
857
  # Display tables with tabulate
731
- log.info(tabulate(table_rows, headers=['Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
732
- log.info(tabulate(status_rows, headers=['Status / triple stores'] + triple_stores, tablefmt="pretty"))
858
+ log.info(tabulate(table_rows, headers=[
859
+ 'Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
860
+ log.info(tabulate(status_rows, headers=[
861
+ 'Status / triple stores'] + triple_stores, tablefmt="pretty"))
733
862
 
734
863
  pass_count = statuses.count(SpecPassed)
735
864
  warning_count = statuses.count(SpecPassedWithWarning)
@@ -746,33 +875,40 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
746
875
 
747
876
  logger_setup.flush()
748
877
  log.info(f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
749
- f"{overview_colour}{warning_count} passed with warnings =====")
878
+ f"{overview_colour}{warning_count} passed with warnings =====")
750
879
 
751
880
  if verbose and (fail_count or warning_count or skipped_count):
752
- for res in results:
753
- if isinstance(res, UpdateSpecFailure):
754
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
755
- log.info(f"{Fore.BLUE} In Expected Not In Actual:")
756
- log.info(res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
757
- log.info()
758
- log.info(f"{Fore.RED} in_actual_not_in_expected")
759
- log.info(res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
760
- log.info(f"{Fore.GREEN} in_both")
761
- log.info(res.graph_comparison.in_both.serialize(format="ttl"))
762
-
763
- if isinstance(res, SelectSpecFailure):
764
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
765
- log.info(res.message)
766
- log.info(res.table_comparison.to_markdown())
767
- if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
768
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
769
- if isinstance(res, SpecPassedWithWarning):
770
- log.info(f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
771
- log.info(res.warning)
772
- if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
773
- isinstance(res, SparqlParseFailure):
774
- log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
775
- log.info(res.exception)
776
- if isinstance(res, SpecSkipped):
777
- log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
778
- log.info(res.message)
881
+ display_verbose(results)
882
+
883
+
884
+ def display_verbose(results: List[SpecResult]):
885
+ for res in results:
886
+ if isinstance(res, UpdateSpecFailure):
887
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
888
+ log.info(f"{Fore.BLUE} In Expected Not In Actual:")
889
+ log.info(
890
+ res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
891
+ log.info()
892
+ log.info(f"{Fore.RED} in_actual_not_in_expected")
893
+ log.info(
894
+ res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
895
+ log.info(f"{Fore.GREEN} in_both")
896
+ log.info(res.graph_comparison.in_both.serialize(format="ttl"))
897
+
898
+ if isinstance(res, SelectSpecFailure):
899
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
900
+ log.info(res.message)
901
+ log.info(res.table_comparison.to_markdown())
902
+ if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
903
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
904
+ if isinstance(res, SpecPassedWithWarning):
905
+ log.info(
906
+ f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
907
+ log.info(res.warning)
908
+ if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
909
+ isinstance(res, SparqlParseFailure):
910
+ log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
911
+ log.info(res.exception)
912
+ if isinstance(res, SpecSkipped):
913
+ log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
914
+ log.info(res.message)