mustrd 0.2.6.1__py3-none-any.whl → 0.2.7a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.adoc +33 -0
- mustrd/anzo_utils.py +121 -0
- mustrd/logger_setup.py +4 -0
- mustrd/model/triplestoreOntology.ttl +0 -8
- mustrd/model/triplestoreshapes.ttl +0 -3
- mustrd/mustrd.py +340 -204
- mustrd/mustrdAnzo.py +55 -130
- mustrd/mustrdGraphDb.py +3 -3
- mustrd/mustrdTestPlugin.py +137 -93
- {mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/METADATA +7 -8
- {mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/RECORD +14 -13
- {mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/WHEEL +1 -1
- mustrd/test/test_mustrd.py +0 -5
- {mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/LICENSE +0 -0
- {mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/entry_points.txt +0 -0
mustrd/mustrd.py
CHANGED
@@ -23,7 +23,7 @@ SOFTWARE.
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import os
|
26
|
-
from typing import Tuple, List
|
26
|
+
from typing import Tuple, List, Union
|
27
27
|
|
28
28
|
import tomli
|
29
29
|
from rdflib.plugins.parsers.notation3 import BadSyntax
|
@@ -54,6 +54,7 @@ from pyshacl import validate
|
|
54
54
|
import logging
|
55
55
|
from http.client import HTTPConnection
|
56
56
|
from .steprunner import upload_given, run_when
|
57
|
+
from multimethods import MultiMethod
|
57
58
|
|
58
59
|
log = logger_setup.setup_logger(__name__)
|
59
60
|
|
@@ -89,7 +90,7 @@ def debug_requests_off():
|
|
89
90
|
debug_requests_off()
|
90
91
|
|
91
92
|
|
92
|
-
@dataclass
|
93
|
+
@dataclass(frozen=True)
|
93
94
|
class Specification:
|
94
95
|
spec_uri: URIRef
|
95
96
|
triple_store: dict
|
@@ -97,6 +98,7 @@ class Specification:
|
|
97
98
|
when: WhenSpec
|
98
99
|
then: ThenSpec
|
99
100
|
spec_file_name: str = "default.mustrd.ttl"
|
101
|
+
spec_source_file: Path = Path("default.mustrd.ttl")
|
100
102
|
|
101
103
|
|
102
104
|
@dataclass
|
@@ -157,6 +159,7 @@ class TripleStoreConnectionError(SpecResult):
|
|
157
159
|
class SpecSkipped(SpecResult):
|
158
160
|
message: str
|
159
161
|
spec_file_name: str = "default.mustrd.ttl"
|
162
|
+
spec_source_file: Path = Path("default.mustrd.ttl")
|
160
163
|
|
161
164
|
|
162
165
|
@dataclass
|
@@ -180,21 +183,25 @@ class UpdateSparqlQuery(SparqlAction):
|
|
180
183
|
|
181
184
|
|
182
185
|
# https://github.com/Semantic-partners/mustrd/issues/19
|
183
|
-
|
186
|
+
# Validate the specs found in spec_path
|
184
187
|
def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
|
185
188
|
-> Tuple[List, Graph, List]:
|
186
189
|
spec_graph = Graph()
|
187
190
|
subject_uris = set()
|
188
191
|
focus_uris = set()
|
189
192
|
invalid_specs = []
|
190
|
-
ttl_files = list(run_config['spec_path'].glob(
|
193
|
+
ttl_files = list(run_config['spec_path'].glob(
|
194
|
+
f'**/{file_name}.mustrd.ttl'))
|
191
195
|
ttl_files.sort()
|
192
|
-
log.info(
|
196
|
+
log.info(
|
197
|
+
f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
|
193
198
|
|
199
|
+
# For each spec file found in spec_path
|
194
200
|
for file in ttl_files:
|
195
201
|
error_messages = []
|
196
202
|
|
197
203
|
log.info(f"Parse: {file}")
|
204
|
+
# Parse spec file and add error message if not conform to RDF standard
|
198
205
|
try:
|
199
206
|
file_graph = Graph().parse(file)
|
200
207
|
except BadSyntax as e:
|
@@ -204,6 +211,7 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
204
211
|
error_messages += [f"Could not extract spec from {file} due to exception of type "
|
205
212
|
f"{type(e).__name__} when parsing file"]
|
206
213
|
continue
|
214
|
+
|
207
215
|
# run shacl validation
|
208
216
|
conforms, results_graph, results_text = validate(file_graph,
|
209
217
|
shacl_graph=shacl_graph,
|
@@ -216,6 +224,8 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
216
224
|
advanced=True,
|
217
225
|
js=False,
|
218
226
|
debug=False)
|
227
|
+
|
228
|
+
# Add error message if not conform to spec shapes
|
219
229
|
if not conforms:
|
220
230
|
for msg in results_graph.objects(predicate=SH.resultMessage):
|
221
231
|
log.warning(f"{file_graph}")
|
@@ -223,47 +233,66 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
223
233
|
error_messages += [f"{msg} File: {file.name}"]
|
224
234
|
|
225
235
|
# collect a list of uris of the tests in focus
|
236
|
+
# If focus is found, only the spec in the focus will be executed
|
226
237
|
for focus_uri in file_graph.subjects(predicate=MUST.focus, object=Literal("true", datatype=XSD.boolean)):
|
227
238
|
if focus_uri in focus_uris:
|
228
239
|
focus_uri = URIRef(str(focus_uri) + "_DUPLICATE")
|
229
240
|
focus_uris.add(focus_uri)
|
230
241
|
|
231
|
-
|
232
|
-
|
233
|
-
if subject_uri in subject_uris:
|
234
|
-
log.warning(f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
|
235
|
-
error_messages += [f"Duplicate subject URI found in {file.name}."]
|
236
|
-
subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
|
237
|
-
if len(error_messages) > 0:
|
238
|
-
error_messages.sort()
|
239
|
-
error_message = "\n".join(msg for msg in error_messages)
|
240
|
-
invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
|
241
|
-
for triple_store in triple_stores]
|
242
|
-
else:
|
243
|
-
subject_uris.add(subject_uri)
|
244
|
-
this_spec_graph = Graph()
|
245
|
-
this_spec_graph.parse(file)
|
246
|
-
spec_uris_in_this_file = list(this_spec_graph.subjects(RDF.type, MUST.TestSpec))
|
247
|
-
for spec in spec_uris_in_this_file:
|
248
|
-
this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
|
249
|
-
this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
|
250
|
-
spec_graph += this_spec_graph
|
242
|
+
add_spec_validation(file_graph, subject_uris, file,
|
243
|
+
triple_stores, error_messages, invalid_specs, spec_graph)
|
251
244
|
|
252
245
|
valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
|
253
246
|
|
254
247
|
if focus_uris:
|
255
|
-
invalid_focus_specs =
|
256
|
-
for spec in invalid_specs:
|
257
|
-
if spec.spec_uri in focus_uris:
|
258
|
-
invalid_focus_specs += [spec]
|
259
|
-
focus_uris.remove(spec.spec_uri)
|
260
|
-
log.info(f"Collected {len(focus_uris)} focus test spec(s)")
|
248
|
+
invalid_focus_specs = get_invalid_focus_spec(focus_uris, invalid_specs)
|
261
249
|
return focus_uris, spec_graph, invalid_focus_specs
|
262
250
|
else:
|
263
251
|
log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
|
264
252
|
return valid_spec_uris, spec_graph, invalid_specs
|
265
253
|
|
266
254
|
|
255
|
+
def get_invalid_focus_spec(focus_uris: set, invalid_specs: list):
|
256
|
+
invalid_focus_specs = []
|
257
|
+
for spec in invalid_specs:
|
258
|
+
if spec.spec_uri in focus_uris:
|
259
|
+
invalid_focus_specs += [spec]
|
260
|
+
focus_uris.remove(spec.spec_uri)
|
261
|
+
log.info(f"Collected {len(focus_uris)} focus test spec(s)")
|
262
|
+
return invalid_focus_specs
|
263
|
+
|
264
|
+
|
265
|
+
# Detect duplicate,
|
266
|
+
# If no error: associate the spec configuration and the file where this conf is stored
|
267
|
+
# If error, aggregate the messages and mark spec as skipped
|
268
|
+
def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple_stores: List,
|
269
|
+
error_messages: list, invalid_specs: list, spec_graph: Graph):
|
270
|
+
|
271
|
+
for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
|
272
|
+
# If we already collected a URI, then we tag it as duplicate and it won't be executed
|
273
|
+
if subject_uri in subject_uris:
|
274
|
+
log.warning(
|
275
|
+
f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
|
276
|
+
error_messages += [f"Duplicate subject URI found in {file.name}."]
|
277
|
+
subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
|
278
|
+
if len(error_messages) == 0:
|
279
|
+
subject_uris.add(subject_uri)
|
280
|
+
this_spec_graph = Graph()
|
281
|
+
this_spec_graph.parse(file)
|
282
|
+
spec_uris_in_this_file = list(
|
283
|
+
this_spec_graph.subjects(RDF.type, MUST.TestSpec))
|
284
|
+
for spec in spec_uris_in_this_file:
|
285
|
+
this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
|
286
|
+
this_spec_graph.add(
|
287
|
+
[spec, MUST.specFileName, Literal(file.name)])
|
288
|
+
spec_graph += this_spec_graph
|
289
|
+
else:
|
290
|
+
error_messages.sort()
|
291
|
+
error_message = "\n".join(msg for msg in error_messages)
|
292
|
+
invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name, file)
|
293
|
+
for triple_store in triple_stores]
|
294
|
+
|
295
|
+
|
267
296
|
def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[dict],
|
268
297
|
run_config: dict):
|
269
298
|
specs = []
|
@@ -271,14 +300,16 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
|
|
271
300
|
try:
|
272
301
|
for triple_store in triple_stores:
|
273
302
|
if "error" in triple_store:
|
274
|
-
log.error(
|
303
|
+
log.error(
|
304
|
+
f"{triple_store['error']}. No specs run for this triple store.")
|
275
305
|
skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
|
276
306
|
get_spec_file(spec_uri, spec_graph)) for spec_uri in
|
277
307
|
spec_uris]
|
278
308
|
else:
|
279
309
|
for spec_uri in spec_uris:
|
280
310
|
try:
|
281
|
-
specs += [get_spec(spec_uri, spec_graph,
|
311
|
+
specs += [get_spec(spec_uri, spec_graph,
|
312
|
+
run_config, triple_store)]
|
282
313
|
except (ValueError, FileNotFoundError, ConnectionError) as e:
|
283
314
|
skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
|
284
315
|
e, get_spec_file(spec_uri, spec_graph))]
|
@@ -319,9 +350,11 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
|
|
319
350
|
mustrd_triple_store=mustrd_triple_store))
|
320
351
|
|
321
352
|
spec_file_name = get_spec_file(spec_uri, spec_graph)
|
353
|
+
spec_file_path = Path(spec_graph.value(
|
354
|
+
subject=spec_uri, predicate=MUST.specSourceFile, default=Path("default.mustrd.ttl")))
|
322
355
|
# https://github.com/Semantic-partners/mustrd/issues/92
|
323
356
|
return Specification(spec_uri, mustrd_triple_store,
|
324
|
-
components[0].value, components[1], components[2], spec_file_name)
|
357
|
+
components[0].value, components[1], components[2], spec_file_name, spec_file_path)
|
325
358
|
|
326
359
|
except (ValueError, FileNotFoundError) as e:
|
327
360
|
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
|
@@ -333,7 +366,7 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
|
|
333
366
|
raise
|
334
367
|
|
335
368
|
|
336
|
-
def check_result(spec, result):
|
369
|
+
def check_result(spec: Specification, result: Union[str, Graph]):
|
337
370
|
if isinstance(spec.then, TableThenSpec):
|
338
371
|
return table_comparison(result, spec)
|
339
372
|
else:
|
@@ -351,7 +384,8 @@ def run_spec(spec: Specification) -> SpecResult:
|
|
351
384
|
spec_uri = spec.spec_uri
|
352
385
|
triple_store = spec.triple_store
|
353
386
|
# close_connection = True
|
354
|
-
log.debug(
|
387
|
+
log.debug(
|
388
|
+
f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
|
355
389
|
if spec.given:
|
356
390
|
given_as_turtle = spec.given.serialize(format="turtle")
|
357
391
|
log.debug(f"{given_as_turtle}")
|
@@ -361,7 +395,8 @@ def run_spec(spec: Specification) -> SpecResult:
|
|
361
395
|
return SpecSkipped(spec_uri, triple_store['type'], "Unable to run Inherited State tests on Rdflib")
|
362
396
|
try:
|
363
397
|
for when in spec.when:
|
364
|
-
log.info(
|
398
|
+
log.info(
|
399
|
+
f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
|
365
400
|
try:
|
366
401
|
result = run_when(spec_uri, triple_store, when)
|
367
402
|
except ParseException as e:
|
@@ -394,17 +429,21 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
|
|
394
429
|
return Graph().parse(triple_store_graph_path).parse(secret_path)
|
395
430
|
|
396
431
|
|
432
|
+
# Parse and validate triple store configuration
|
397
433
|
def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
398
434
|
triple_stores = []
|
399
|
-
shacl_graph = Graph().parse(
|
400
|
-
|
435
|
+
shacl_graph = Graph().parse(
|
436
|
+
Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
|
437
|
+
ont_graph = Graph().parse(
|
438
|
+
Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
|
439
|
+
# SHACL validation of triple store configuration
|
401
440
|
conforms, results_graph, results_text = validate(
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
441
|
+
data_graph=triple_store_graph,
|
442
|
+
shacl_graph=shacl_graph,
|
443
|
+
ont_graph=ont_graph,
|
444
|
+
advanced=True,
|
445
|
+
inference='none'
|
446
|
+
)
|
408
447
|
if not conforms:
|
409
448
|
raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
|
410
449
|
results_graph)
|
@@ -414,46 +453,13 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
|
414
453
|
triple_store["uri"] = triple_store_config
|
415
454
|
# Anzo graph via anzo
|
416
455
|
if triple_store_type == TRIPLESTORE.Anzo:
|
417
|
-
|
418
|
-
|
419
|
-
try:
|
420
|
-
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
421
|
-
predicate=TRIPLESTORE.username))
|
422
|
-
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
423
|
-
predicate=TRIPLESTORE.password))
|
424
|
-
except (FileNotFoundError, ValueError) as e:
|
425
|
-
triple_store["error"] = e
|
426
|
-
triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
|
427
|
-
predicate=TRIPLESTORE.gqeURI)
|
428
|
-
triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
|
429
|
-
predicate=TRIPLESTORE.inputGraph)
|
430
|
-
triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
|
431
|
-
predicate=TRIPLESTORE.outputGraph)
|
432
|
-
try:
|
433
|
-
check_triple_store_params(triple_store, ["url", "port", "username", "password", "input_graph"])
|
434
|
-
except ValueError as e:
|
435
|
-
triple_store["error"] = e
|
456
|
+
get_anzo_configuration(
|
457
|
+
triple_store, triple_store_graph, triple_store_config)
|
436
458
|
# GraphDB
|
437
459
|
elif triple_store_type == TRIPLESTORE.GraphDb:
|
438
|
-
|
439
|
-
|
440
|
-
try:
|
441
|
-
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
442
|
-
predicate=TRIPLESTORE.username))
|
443
|
-
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
444
|
-
predicate=TRIPLESTORE.password))
|
445
|
-
except (FileNotFoundError, ValueError) as e:
|
446
|
-
log.error(f"Credential retrieval failed {e}")
|
447
|
-
triple_store["error"] = e
|
448
|
-
triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
|
449
|
-
predicate=TRIPLESTORE.repository)
|
450
|
-
triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
|
451
|
-
predicate=TRIPLESTORE.inputGraph)
|
460
|
+
get_graphDB_configuration(
|
461
|
+
triple_store, triple_store_graph, triple_store_config)
|
452
462
|
|
453
|
-
try:
|
454
|
-
check_triple_store_params(triple_store, ["url", "port", "repository"])
|
455
|
-
except ValueError as e:
|
456
|
-
triple_store["error"] = e
|
457
463
|
elif triple_store_type != TRIPLESTORE.RdfLib:
|
458
464
|
triple_store["error"] = f"Triple store not implemented: {triple_store_type}"
|
459
465
|
|
@@ -461,15 +467,65 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
|
461
467
|
return triple_stores
|
462
468
|
|
463
469
|
|
470
|
+
def get_anzo_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
|
471
|
+
triple_store["url"] = triple_store_graph.value(
|
472
|
+
subject=triple_store_config, predicate=TRIPLESTORE.url)
|
473
|
+
triple_store["port"] = triple_store_graph.value(
|
474
|
+
subject=triple_store_config, predicate=TRIPLESTORE.port)
|
475
|
+
try:
|
476
|
+
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
477
|
+
predicate=TRIPLESTORE.username))
|
478
|
+
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
479
|
+
predicate=TRIPLESTORE.password))
|
480
|
+
except (FileNotFoundError, ValueError) as e:
|
481
|
+
triple_store["error"] = e
|
482
|
+
triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
|
483
|
+
predicate=TRIPLESTORE.gqeURI)
|
484
|
+
triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
|
485
|
+
predicate=TRIPLESTORE.inputGraph)
|
486
|
+
triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
|
487
|
+
predicate=TRIPLESTORE.outputGraph)
|
488
|
+
try:
|
489
|
+
check_triple_store_params(
|
490
|
+
triple_store, ["url", "port", "username", "password", "input_graph"])
|
491
|
+
except ValueError as e:
|
492
|
+
triple_store["error"] = e
|
493
|
+
|
494
|
+
|
495
|
+
def get_graphDB_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
|
496
|
+
triple_store["url"] = triple_store_graph.value(
|
497
|
+
subject=triple_store_config, predicate=TRIPLESTORE.url)
|
498
|
+
triple_store["port"] = triple_store_graph.value(
|
499
|
+
subject=triple_store_config, predicate=TRIPLESTORE.port)
|
500
|
+
try:
|
501
|
+
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
502
|
+
predicate=TRIPLESTORE.username))
|
503
|
+
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
504
|
+
predicate=TRIPLESTORE.password))
|
505
|
+
except (FileNotFoundError, ValueError) as e:
|
506
|
+
log.error(f"Credential retrieval failed {e}")
|
507
|
+
triple_store["error"] = e
|
508
|
+
triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
|
509
|
+
predicate=TRIPLESTORE.repository)
|
510
|
+
triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
|
511
|
+
predicate=TRIPLESTORE.inputGraph)
|
512
|
+
try:
|
513
|
+
check_triple_store_params(triple_store, ["url", "repository"])
|
514
|
+
except ValueError as e:
|
515
|
+
triple_store["error"] = e
|
516
|
+
|
517
|
+
|
464
518
|
def check_triple_store_params(triple_store: dict, required_params: List[str]):
|
465
|
-
missing_params = [
|
519
|
+
missing_params = [
|
520
|
+
param for param in required_params if triple_store.get(param) is None]
|
466
521
|
if missing_params:
|
467
522
|
raise ValueError(f"Cannot establish connection to {triple_store['type']}. "
|
468
523
|
f"Missing required parameter(s): {', '.join(missing_params)}.")
|
469
524
|
|
470
525
|
|
471
526
|
def get_credential_from_file(triple_store_name: URIRef, credential: str, config_path: Literal) -> str:
|
472
|
-
log.info(
|
527
|
+
log.info(
|
528
|
+
f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
|
473
529
|
if not config_path:
|
474
530
|
raise ValueError(f"Cannot establish connection defined in {triple_store_name}. "
|
475
531
|
f"Missing required parameter: {credential}.")
|
@@ -508,7 +564,8 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
|
|
508
564
|
else:
|
509
565
|
values.append(str(XSD.anyURI))
|
510
566
|
|
511
|
-
frames = pandas.concat(objs=[frames, pandas.DataFrame(
|
567
|
+
frames = pandas.concat(objs=[frames, pandas.DataFrame(
|
568
|
+
[values], columns=columns)], ignore_index=True)
|
512
569
|
frames.fillna('', inplace=True)
|
513
570
|
|
514
571
|
if frames.size == 0:
|
@@ -516,94 +573,124 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
|
|
516
573
|
return frames
|
517
574
|
|
518
575
|
|
519
|
-
# https://github.com/Semantic-partners/mustrd/issues/110
|
520
|
-
# https://github.com/Semantic-partners/mustrd/issues/52
|
521
576
|
def table_comparison(result: str, spec: Specification) -> SpecResult:
|
522
577
|
warning = None
|
523
578
|
order_list = ["order by ?", "order by desc", "order by asc"]
|
524
|
-
ordered_result = any(
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
579
|
+
ordered_result = any(
|
580
|
+
pattern in spec.when[0].value.lower() for pattern in order_list)
|
581
|
+
|
582
|
+
# If sparql query doesn't contain order by clause, but order is define in then spec:
|
583
|
+
# Then ignore order in then spec and print a warning
|
584
|
+
if not ordered_result and spec.then.ordered:
|
585
|
+
warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
|
586
|
+
log.warning(warning)
|
587
|
+
|
588
|
+
# If sparql query contains an order by clause and then spec is not order:
|
589
|
+
# Spec is inconsistent
|
590
|
+
if ordered_result and not spec.then.ordered:
|
591
|
+
message = "Actual result is ordered, must:then must contain sh:order on every row."
|
592
|
+
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
|
593
|
+
|
594
|
+
# Convert results to dataframe
|
595
|
+
if is_json(result):
|
596
|
+
df = json_results_to_panda_dataframe(result)
|
597
|
+
else:
|
598
|
+
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, "Sparql result is not in JSON")
|
535
599
|
|
536
|
-
|
537
|
-
|
538
|
-
df.reset_index(inplace=True, drop=True)
|
539
|
-
if spec.then.ordered:
|
540
|
-
warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
|
541
|
-
log.warning(warning)
|
542
|
-
|
543
|
-
# Scenario 1: expected no result but got a result
|
544
|
-
if then.empty:
|
545
|
-
message = f"""Expected 0 row(s) and 0 column(s),
|
546
|
-
got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"""
|
547
|
-
empty_then = create_empty_dataframe_with_columns(df)
|
548
|
-
df_diff = empty_then.compare(df, result_names=("expected", "actual"))
|
600
|
+
# Compare result with expected
|
601
|
+
df_diff, message = compare_table_results(df, spec)
|
549
602
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
message = f"Expected {then.shape[0]} row(s) and {round(then.shape[1] / 2)} column(s), " \
|
554
|
-
f"got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"
|
555
|
-
if ordered_result is True and not spec.then.ordered:
|
556
|
-
message += ". Actual result is ordered, must:then must contain sh:order on every row."
|
557
|
-
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
|
558
|
-
else:
|
559
|
-
if len(columns) == len(then.columns):
|
560
|
-
if sorted_columns == sorted_then_cols:
|
561
|
-
then = then[columns]
|
562
|
-
if not ordered_result:
|
563
|
-
then.sort_values(by=columns[::2], inplace=True)
|
564
|
-
then.reset_index(drop=True, inplace=True)
|
565
|
-
if df.shape == then.shape and (df.columns == then.columns).all():
|
566
|
-
df_diff = then.compare(df, result_names=("expected", "actual"))
|
567
|
-
else:
|
568
|
-
df_diff = construct_df_diff(df, then)
|
569
|
-
|
570
|
-
else:
|
571
|
-
then = then[sorted_then_cols]
|
572
|
-
df = df[sorted_columns]
|
573
|
-
df_diff = construct_df_diff(df, then)
|
574
|
-
else:
|
575
|
-
|
576
|
-
then = then[sorted_then_cols]
|
577
|
-
df = df[sorted_columns]
|
578
|
-
df_diff = construct_df_diff(df, then)
|
603
|
+
if df_diff.empty:
|
604
|
+
if warning:
|
605
|
+
return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
|
579
606
|
else:
|
607
|
+
return SpecPassed(spec.spec_uri, spec.triple_store["type"])
|
608
|
+
else:
|
609
|
+
log.error("\n" + df_diff.to_markdown())
|
610
|
+
log.error(message)
|
611
|
+
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
|
580
612
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
613
|
+
|
614
|
+
def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
|
615
|
+
return not resultDf.empty, not spec.then.value.empty
|
616
|
+
|
617
|
+
|
618
|
+
compare_table_results = MultiMethod(
|
619
|
+
"compare_table_results", compare_table_results_dispatch)
|
620
|
+
|
621
|
+
|
622
|
+
# Scenario 1: expected a result and got a result
|
623
|
+
@compare_table_results.method((True, True))
|
624
|
+
def _compare_results(resultDf: DataFrame, spec: Specification):
|
625
|
+
columns = list(resultDf.columns)
|
626
|
+
sorted_columns = sorted(columns)
|
627
|
+
then = spec.then.value
|
628
|
+
sorted_then_cols = sorted(list(then))
|
629
|
+
order_list = ["order by ?", "order by desc", "order by asc"]
|
630
|
+
ordered_result = any(
|
631
|
+
pattern in spec.when[0].value.lower() for pattern in order_list)
|
632
|
+
|
633
|
+
if not ordered_result:
|
634
|
+
resultDf.sort_values(by=list(resultDf.columns)[::2], inplace=True)
|
635
|
+
resultDf.reset_index(inplace=True, drop=True)
|
636
|
+
|
637
|
+
if len(columns) == len(then.columns):
|
638
|
+
if sorted_columns == sorted_then_cols:
|
639
|
+
then = then[columns]
|
640
|
+
if not ordered_result:
|
641
|
+
then.sort_values(by=columns[::2], inplace=True)
|
642
|
+
then.reset_index(drop=True, inplace=True)
|
643
|
+
if resultDf.shape == then.shape and (resultDf.columns == then.columns).all():
|
644
|
+
df_diff = then.compare(
|
645
|
+
resultDf, result_names=("expected", "actual"))
|
596
646
|
else:
|
597
|
-
|
647
|
+
df_diff = construct_df_diff(resultDf, then)
|
598
648
|
else:
|
599
|
-
|
600
|
-
|
601
|
-
|
649
|
+
then = then[sorted_then_cols]
|
650
|
+
resultDf = resultDf[sorted_columns]
|
651
|
+
df_diff = construct_df_diff(resultDf, then)
|
652
|
+
else:
|
653
|
+
then = then[sorted_then_cols]
|
654
|
+
resultDf = resultDf[sorted_columns]
|
655
|
+
df_diff = construct_df_diff(resultDf, then)
|
602
656
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
657
|
+
message = build_summary_message(then.shape[0], round(
|
658
|
+
then.shape[1] / 2), resultDf.shape[0], round(resultDf.shape[1] / 2))
|
659
|
+
return df_diff, message
|
660
|
+
|
661
|
+
|
662
|
+
# Scenario 2: expected no result but got a result
|
663
|
+
@compare_table_results.method((True, False))
|
664
|
+
def _unexpected_results(resultDf: DataFrame, spec: Specification):
|
665
|
+
empty_then = create_empty_dataframe_with_columns(resultDf)
|
666
|
+
df_diff = empty_then.compare(resultDf, result_names=("expected", "actual"))
|
667
|
+
|
668
|
+
return df_diff, build_summary_message(0, 0, resultDf.shape[0], round(resultDf.shape[1] / 2))
|
669
|
+
|
670
|
+
|
671
|
+
# Scenario 3: expected a result, but got an empty result
|
672
|
+
@compare_table_results.method((False, True))
|
673
|
+
def _missing_results(resultDf: DataFrame, spec: Specification):
|
674
|
+
then = spec.then.value
|
675
|
+
then = then[sorted(list(then))]
|
676
|
+
df = create_empty_dataframe_with_columns(then)
|
677
|
+
df_diff = then.compare(df, result_names=("expected", "actual"))
|
678
|
+
|
679
|
+
return df_diff, build_summary_message(then.shape[0], round(then.shape[1] / 2), 0, 0)
|
680
|
+
|
681
|
+
|
682
|
+
# Scenario 4: expected no result, got no result
|
683
|
+
@compare_table_results.method((False, False))
|
684
|
+
def _no_results(resultDf: DataFrame, spec: Specification):
|
685
|
+
df = pandas.DataFrame()
|
686
|
+
df_diff = spec.then.value.compare(df, result_names=("expected", "actual"))
|
687
|
+
|
688
|
+
return df_diff, build_summary_message(0, 0, 0, 0)
|
689
|
+
|
690
|
+
|
691
|
+
def build_summary_message(expected_rows, expected_columns, got_rows, got_columns):
|
692
|
+
return f"Expected {expected_rows} row(s) and {expected_columns} column(s), " \
|
693
|
+
f"got {got_rows} row(s) and {got_columns} column(s)"
|
607
694
|
|
608
695
|
|
609
696
|
def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphComparison:
|
@@ -637,6 +724,35 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
|
|
637
724
|
return expected_results
|
638
725
|
|
639
726
|
|
727
|
+
def write_result_diff_to_log(res):
|
728
|
+
if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
|
729
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
730
|
+
log.info(f"{Fore.BLUE} In Expected Not In Actual:")
|
731
|
+
log.info(
|
732
|
+
res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
|
733
|
+
log.info(f"{Fore.RED} in_actual_not_in_expected")
|
734
|
+
log.info(
|
735
|
+
res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
|
736
|
+
log.info(f"{Fore.GREEN} in_both")
|
737
|
+
log.info(res.graph_comparison.in_both.serialize(format="ttl"))
|
738
|
+
|
739
|
+
if isinstance(res, SelectSpecFailure):
|
740
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
741
|
+
log.info(res.message)
|
742
|
+
log.info(res.table_comparison.to_markdown())
|
743
|
+
if isinstance(res, SpecPassedWithWarning):
|
744
|
+
log.info(
|
745
|
+
f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
|
746
|
+
log.info(res.warning)
|
747
|
+
if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
|
748
|
+
isinstance(res, SparqlParseFailure):
|
749
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
750
|
+
log.info(res.exception)
|
751
|
+
if isinstance(res, SpecSkipped):
|
752
|
+
log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
753
|
+
log.info(res.message)
|
754
|
+
|
755
|
+
|
640
756
|
def calculate_row_difference(df1: pandas.DataFrame,
|
641
757
|
df2: pandas.DataFrame) -> pandas.DataFrame:
|
642
758
|
df_all = df1.merge(df2.drop_duplicates(), how='left', indicator=True)
|
@@ -657,12 +773,16 @@ def construct_df_diff(df: pandas.DataFrame,
|
|
657
773
|
modified_then = then
|
658
774
|
|
659
775
|
if actual_columns.size > 0:
|
660
|
-
modified_then = modified_then.reindex(
|
661
|
-
|
776
|
+
modified_then = modified_then.reindex(
|
777
|
+
modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
|
778
|
+
modified_then[actual_columns.to_list(
|
779
|
+
)] = modified_then[actual_columns.to_list()].fillna('')
|
662
780
|
|
663
781
|
if expected_columns.size > 0:
|
664
|
-
modified_df = modified_df.reindex(
|
665
|
-
|
782
|
+
modified_df = modified_df.reindex(
|
783
|
+
modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
|
784
|
+
modified_df[expected_columns.to_list(
|
785
|
+
)] = modified_df[expected_columns.to_list()].fillna('')
|
666
786
|
|
667
787
|
modified_df = modified_df.reindex(modified_then.columns, axis=1)
|
668
788
|
|
@@ -686,13 +806,17 @@ def generate_row_diff(actual_rows: pandas.DataFrame, expected_rows: pandas.DataF
|
|
686
806
|
|
687
807
|
if actual_rows.shape[0] > 0:
|
688
808
|
empty_actual_copy = create_empty_dataframe_with_columns(actual_rows)
|
689
|
-
df_diff_actual_rows = empty_actual_copy.compare(
|
809
|
+
df_diff_actual_rows = empty_actual_copy.compare(
|
810
|
+
actual_rows, result_names=("expected", "actual"))
|
690
811
|
|
691
812
|
if expected_rows.shape[0] > 0:
|
692
|
-
empty_expected_copy = create_empty_dataframe_with_columns(
|
693
|
-
|
813
|
+
empty_expected_copy = create_empty_dataframe_with_columns(
|
814
|
+
expected_rows)
|
815
|
+
df_diff_expected_rows = expected_rows.compare(
|
816
|
+
empty_expected_copy, result_names=("expected", "actual"))
|
694
817
|
|
695
|
-
df_diff_rows = pandas.concat(
|
818
|
+
df_diff_rows = pandas.concat(
|
819
|
+
[df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
|
696
820
|
return df_diff_rows
|
697
821
|
|
698
822
|
|
@@ -707,15 +831,18 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
707
831
|
# Init dictionaries
|
708
832
|
status_dict = defaultdict(lambda: defaultdict(int))
|
709
833
|
status_counts = defaultdict(lambda: defaultdict(int))
|
710
|
-
colours = {SpecPassed: Fore.GREEN,
|
834
|
+
colours = {SpecPassed: Fore.GREEN,
|
835
|
+
SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
|
711
836
|
# Populate dictionaries from results
|
712
837
|
for result in results:
|
713
838
|
status_counts[result.triple_store][type(result)] += 1
|
714
839
|
status_dict[result.spec_uri][result.triple_store] = type(result)
|
715
840
|
|
716
841
|
# Get the list of statuses and list of unique triple stores
|
717
|
-
statuses = list(status for inner_dict in status_dict.values()
|
718
|
-
|
842
|
+
statuses = list(status for inner_dict in status_dict.values()
|
843
|
+
for status in inner_dict.values())
|
844
|
+
triple_stores = list(set(status for inner_dict in status_dict.values()
|
845
|
+
for status in inner_dict.keys()))
|
719
846
|
|
720
847
|
# Convert dictionaries to list for tabulate
|
721
848
|
table_rows = [[spec_uri] + [
|
@@ -728,8 +855,10 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
728
855
|
for triple_store in triple_stores] for status in set(statuses)]
|
729
856
|
|
730
857
|
# Display tables with tabulate
|
731
|
-
log.info(tabulate(table_rows, headers=[
|
732
|
-
|
858
|
+
log.info(tabulate(table_rows, headers=[
|
859
|
+
'Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
|
860
|
+
log.info(tabulate(status_rows, headers=[
|
861
|
+
'Status / triple stores'] + triple_stores, tablefmt="pretty"))
|
733
862
|
|
734
863
|
pass_count = statuses.count(SpecPassed)
|
735
864
|
warning_count = statuses.count(SpecPassedWithWarning)
|
@@ -746,33 +875,40 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
746
875
|
|
747
876
|
logger_setup.flush()
|
748
877
|
log.info(f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
|
749
|
-
|
878
|
+
f"{overview_colour}{warning_count} passed with warnings =====")
|
750
879
|
|
751
880
|
if verbose and (fail_count or warning_count or skipped_count):
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
881
|
+
display_verbose(results)
|
882
|
+
|
883
|
+
|
884
|
+
def display_verbose(results: List[SpecResult]):
|
885
|
+
for res in results:
|
886
|
+
if isinstance(res, UpdateSpecFailure):
|
887
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
888
|
+
log.info(f"{Fore.BLUE} In Expected Not In Actual:")
|
889
|
+
log.info(
|
890
|
+
res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
|
891
|
+
log.info()
|
892
|
+
log.info(f"{Fore.RED} in_actual_not_in_expected")
|
893
|
+
log.info(
|
894
|
+
res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
|
895
|
+
log.info(f"{Fore.GREEN} in_both")
|
896
|
+
log.info(res.graph_comparison.in_both.serialize(format="ttl"))
|
897
|
+
|
898
|
+
if isinstance(res, SelectSpecFailure):
|
899
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
900
|
+
log.info(res.message)
|
901
|
+
log.info(res.table_comparison.to_markdown())
|
902
|
+
if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
|
903
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
904
|
+
if isinstance(res, SpecPassedWithWarning):
|
905
|
+
log.info(
|
906
|
+
f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
|
907
|
+
log.info(res.warning)
|
908
|
+
if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
|
909
|
+
isinstance(res, SparqlParseFailure):
|
910
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
911
|
+
log.info(res.exception)
|
912
|
+
if isinstance(res, SpecSkipped):
|
913
|
+
log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
914
|
+
log.info(res.message)
|