mustrd 0.2.7a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.md +2 -0
- mustrd/anzo_utils.py +8 -5
- mustrd/logger_setup.py +3 -0
- mustrd/model/mustrdShapes.ttl +25 -6
- mustrd/model/ontology.ttl +6 -2
- mustrd/mustrd.py +508 -235
- mustrd/mustrdAnzo.py +3 -2
- mustrd/mustrdRdfLib.py +8 -1
- mustrd/mustrdTestPlugin.py +299 -128
- mustrd/namespace.py +10 -1
- mustrd/spec_component.py +238 -58
- mustrd/steprunner.py +78 -20
- mustrd-0.3.1a0.dist-info/METADATA +96 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/RECORD +17 -17
- mustrd-0.2.7a0.dist-info/METADATA +0 -96
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/LICENSE +0 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/WHEEL +0 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/entry_points.txt +0 -0
mustrd/mustrd.py
CHANGED
@@ -1,27 +1,3 @@
|
|
1
|
-
"""
|
2
|
-
MIT License
|
3
|
-
|
4
|
-
Copyright (c) 2023 Semantic Partners Ltd
|
5
|
-
|
6
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
-
of this software and associated documentation files (the "Software"), to deal
|
8
|
-
in the Software without restriction, including without limitation the rights
|
9
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
-
copies of the Software, and to permit persons to whom the Software is
|
11
|
-
furnished to do so, subject to the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be included in all
|
14
|
-
copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
-
SOFTWARE.
|
23
|
-
"""
|
24
|
-
|
25
1
|
import os
|
26
2
|
from typing import Tuple, List, Union
|
27
3
|
|
@@ -53,19 +29,21 @@ from collections import defaultdict
|
|
53
29
|
from pyshacl import validate
|
54
30
|
import logging
|
55
31
|
from http.client import HTTPConnection
|
56
|
-
from .steprunner import upload_given,
|
32
|
+
from .steprunner import upload_given, run_when_impl
|
57
33
|
from multimethods import MultiMethod
|
34
|
+
import traceback
|
35
|
+
from functools import wraps
|
58
36
|
|
59
|
-
log =
|
37
|
+
log = logging.getLogger(__name__)
|
60
38
|
|
61
39
|
requests.packages.urllib3.disable_warnings()
|
62
|
-
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS +=
|
40
|
+
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL"
|
63
41
|
|
64
|
-
logging.basicConfig(format=
|
42
|
+
logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO)
|
65
43
|
|
66
44
|
|
67
45
|
def debug_requests_on():
|
68
|
-
|
46
|
+
"""Switches on logging of the requests module."""
|
69
47
|
HTTPConnection.debuglevel = 1
|
70
48
|
|
71
49
|
logging.basicConfig()
|
@@ -76,7 +54,7 @@ def debug_requests_on():
|
|
76
54
|
|
77
55
|
|
78
56
|
def debug_requests_off():
|
79
|
-
|
57
|
+
"""Switches off logging of the requests module, might be some side-effects"""
|
80
58
|
HTTPConnection.debuglevel = 0
|
81
59
|
|
82
60
|
root_logger = logging.getLogger()
|
@@ -184,20 +162,34 @@ class UpdateSparqlQuery(SparqlAction):
|
|
184
162
|
|
185
163
|
# https://github.com/Semantic-partners/mustrd/issues/19
|
186
164
|
# Validate the specs found in spec_path
|
187
|
-
def validate_specs(
|
188
|
-
|
165
|
+
def validate_specs(
|
166
|
+
run_config: dict,
|
167
|
+
triple_stores: List,
|
168
|
+
shacl_graph: Graph,
|
169
|
+
ont_graph: Graph,
|
170
|
+
file_name: str = "*",
|
171
|
+
selected_test_files: List[str] = [],
|
172
|
+
) -> Tuple[List, Graph, List]:
|
189
173
|
spec_graph = Graph()
|
190
174
|
subject_uris = set()
|
191
175
|
focus_uris = set()
|
192
176
|
invalid_specs = []
|
193
|
-
ttl_files =
|
194
|
-
|
177
|
+
ttl_files = []
|
178
|
+
|
179
|
+
if not selected_test_files:
|
180
|
+
ttl_files = list(run_config["spec_path"].glob(f"**/{file_name}.mustrd.ttl"))
|
181
|
+
log.info(
|
182
|
+
f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}"
|
183
|
+
)
|
184
|
+
else:
|
185
|
+
ttl_files = selected_test_files
|
186
|
+
|
187
|
+
log.info(f"Using {ttl_files} for test source")
|
195
188
|
ttl_files.sort()
|
196
|
-
log.info(
|
197
|
-
f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
|
198
189
|
|
199
190
|
# For each spec file found in spec_path
|
200
191
|
for file in ttl_files:
|
192
|
+
# file = file.resolve()
|
201
193
|
error_messages = []
|
202
194
|
|
203
195
|
log.info(f"Parse: {file}")
|
@@ -207,24 +199,31 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
207
199
|
except BadSyntax as e:
|
208
200
|
template = "An exception of type {0} occurred when trying to parse a spec file. Arguments:\n{1!r}"
|
209
201
|
message = template.format(type(e).__name__, e.args)
|
210
|
-
log.error(message)
|
211
|
-
error_messages += [
|
212
|
-
|
202
|
+
log.error(message, exc_info=True)
|
203
|
+
error_messages += [
|
204
|
+
f"Could not extract spec from {file} due to exception of type "
|
205
|
+
f"{type(e).__name__} when parsing file"
|
206
|
+
]
|
213
207
|
continue
|
214
208
|
|
215
209
|
# run shacl validation
|
216
|
-
conforms, results_graph, results_text = validate(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
210
|
+
conforms, results_graph, results_text = validate(
|
211
|
+
file_graph,
|
212
|
+
shacl_graph=shacl_graph,
|
213
|
+
ont_graph=ont_graph,
|
214
|
+
inference="none",
|
215
|
+
abort_on_first=False,
|
216
|
+
allow_infos=False,
|
217
|
+
allow_warnings=False,
|
218
|
+
meta_shacl=False,
|
219
|
+
advanced=True,
|
220
|
+
js=False,
|
221
|
+
debug=False,
|
222
|
+
)
|
223
|
+
if str(file.name).endswith("_duplicate"):
|
224
|
+
log.debug(f"Validation of {file.name} against SHACL shapes: {conforms}")
|
225
|
+
log.debug(f"{results_graph.serialize(format='turtle')}")
|
226
|
+
# log.debug(f"SHACL validation results: {results_text}")
|
228
227
|
# Add error message if not conform to spec shapes
|
229
228
|
if not conforms:
|
230
229
|
for msg in results_graph.objects(predicate=SH.resultMessage):
|
@@ -234,13 +233,22 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
234
233
|
|
235
234
|
# collect a list of uris of the tests in focus
|
236
235
|
# If focus is found, only the spec in the focus will be executed
|
237
|
-
for focus_uri in file_graph.subjects(
|
236
|
+
for focus_uri in file_graph.subjects(
|
237
|
+
predicate=MUST.focus, object=Literal("true", datatype=XSD.boolean)
|
238
|
+
):
|
238
239
|
if focus_uri in focus_uris:
|
239
240
|
focus_uri = URIRef(str(focus_uri) + "_DUPLICATE")
|
240
241
|
focus_uris.add(focus_uri)
|
241
242
|
|
242
|
-
add_spec_validation(
|
243
|
-
|
243
|
+
add_spec_validation(
|
244
|
+
file_graph,
|
245
|
+
subject_uris,
|
246
|
+
file,
|
247
|
+
triple_stores,
|
248
|
+
error_messages,
|
249
|
+
invalid_specs,
|
250
|
+
spec_graph,
|
251
|
+
)
|
244
252
|
|
245
253
|
valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
|
246
254
|
|
@@ -265,14 +273,26 @@ def get_invalid_focus_spec(focus_uris: set, invalid_specs: list):
|
|
265
273
|
# Detect duplicate,
|
266
274
|
# If no error: associate the spec configuration and the file where this conf is stored
|
267
275
|
# If error, aggregate the messages and mark spec as skipped
|
268
|
-
def add_spec_validation(
|
269
|
-
|
276
|
+
def add_spec_validation(
|
277
|
+
file_graph: Graph,
|
278
|
+
subject_uris: set,
|
279
|
+
file: Path,
|
280
|
+
triple_stores: List,
|
281
|
+
error_messages: list,
|
282
|
+
invalid_specs: list,
|
283
|
+
spec_graph: Graph,
|
284
|
+
):
|
270
285
|
|
271
286
|
for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
|
287
|
+
# Always add file name and source file to the graph for error reporting
|
288
|
+
file_graph.add([subject_uri, MUST.specSourceFile, Literal(str(file))])
|
289
|
+
file_graph.add([subject_uri, MUST.specFileName, Literal(file.name)])
|
290
|
+
|
272
291
|
# If we already collected a URI, then we tag it as duplicate and it won't be executed
|
273
292
|
if subject_uri in subject_uris:
|
274
293
|
log.warning(
|
275
|
-
f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed."
|
294
|
+
f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed."
|
295
|
+
)
|
276
296
|
error_messages += [f"Duplicate subject URI found in {file.name}."]
|
277
297
|
subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
|
278
298
|
if len(error_messages) == 0:
|
@@ -280,43 +300,81 @@ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple
|
|
280
300
|
this_spec_graph = Graph()
|
281
301
|
this_spec_graph.parse(file)
|
282
302
|
spec_uris_in_this_file = list(
|
283
|
-
this_spec_graph.subjects(RDF.type, MUST.TestSpec)
|
303
|
+
this_spec_graph.subjects(RDF.type, MUST.TestSpec)
|
304
|
+
)
|
284
305
|
for spec in spec_uris_in_this_file:
|
285
306
|
this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
|
286
|
-
this_spec_graph.add(
|
287
|
-
[spec, MUST.specFileName, Literal(file.name)])
|
307
|
+
this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
|
288
308
|
spec_graph += this_spec_graph
|
289
309
|
else:
|
290
310
|
error_messages.sort()
|
291
311
|
error_message = "\n".join(msg for msg in error_messages)
|
292
|
-
invalid_specs += [
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
312
|
+
invalid_specs += [
|
313
|
+
SpecSkipped(
|
314
|
+
subject_uri, triple_store["type"], error_message, file.name, file
|
315
|
+
)
|
316
|
+
for triple_store in triple_stores
|
317
|
+
]
|
318
|
+
|
319
|
+
|
320
|
+
def get_specs(
|
321
|
+
spec_uris: List[URIRef],
|
322
|
+
spec_graph: Graph,
|
323
|
+
triple_stores: List[dict],
|
324
|
+
run_config: dict,
|
325
|
+
):
|
298
326
|
specs = []
|
299
327
|
skipped_results = []
|
300
328
|
try:
|
301
329
|
for triple_store in triple_stores:
|
302
330
|
if "error" in triple_store:
|
303
331
|
log.error(
|
304
|
-
f"{triple_store['error']}. No specs run for this triple store."
|
305
|
-
|
306
|
-
|
307
|
-
|
332
|
+
f"{triple_store['error']}. No specs run for this triple store."
|
333
|
+
)
|
334
|
+
skipped_results += [
|
335
|
+
SpecSkipped(
|
336
|
+
spec_uri,
|
337
|
+
triple_store["type"],
|
338
|
+
triple_store["error"],
|
339
|
+
get_spec_file(spec_uri, spec_graph),
|
340
|
+
)
|
341
|
+
for spec_uri in spec_uris
|
342
|
+
]
|
308
343
|
else:
|
309
344
|
for spec_uri in spec_uris:
|
310
345
|
try:
|
311
|
-
specs += [
|
312
|
-
|
346
|
+
specs += [
|
347
|
+
get_spec(spec_uri, spec_graph, run_config, triple_store)
|
348
|
+
]
|
313
349
|
except (ValueError, FileNotFoundError, ConnectionError) as e:
|
314
|
-
|
315
|
-
|
350
|
+
# Try to get file name/path from the graph, but fallback to "unknown"
|
351
|
+
file_name = (
|
352
|
+
spec_graph.value(
|
353
|
+
subject=spec_uri, predicate=MUST.specFileName
|
354
|
+
)
|
355
|
+
or "unknown"
|
356
|
+
)
|
357
|
+
file_path = (
|
358
|
+
spec_graph.value(
|
359
|
+
subject=spec_uri, predicate=MUST.specSourceFile
|
360
|
+
)
|
361
|
+
or "unknown"
|
362
|
+
)
|
363
|
+
skipped_results += [
|
364
|
+
SpecSkipped(
|
365
|
+
spec_uri,
|
366
|
+
triple_store["type"],
|
367
|
+
str(e),
|
368
|
+
str(file_name),
|
369
|
+
Path(file_path),
|
370
|
+
)
|
371
|
+
]
|
316
372
|
|
317
373
|
except (BadSyntax, FileNotFoundError) as e:
|
318
|
-
template =
|
319
|
-
|
374
|
+
template = (
|
375
|
+
"An exception of type {0} occurred when trying to parse the triple store configuration file. "
|
376
|
+
"Arguments:\n{1!r}"
|
377
|
+
)
|
320
378
|
message = template.format(type(e).__name__, e.args)
|
321
379
|
log.error(message)
|
322
380
|
log.error("No specifications will be run.")
|
@@ -334,31 +392,62 @@ def run_specs(specs) -> List[SpecResult]:
|
|
334
392
|
|
335
393
|
|
336
394
|
def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
395
|
+
file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName)
|
396
|
+
if file_name:
|
397
|
+
return str(file_name)
|
398
|
+
# fallback: try to get from MUST.specSourceFile
|
399
|
+
file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile)
|
400
|
+
if file_path:
|
401
|
+
return str(Path(file_path).name)
|
402
|
+
return "default.mustrd.ttl"
|
403
|
+
|
404
|
+
|
405
|
+
def get_spec(
|
406
|
+
spec_uri: URIRef,
|
407
|
+
spec_graph: Graph,
|
408
|
+
run_config: dict,
|
409
|
+
mustrd_triple_store: dict = None,
|
410
|
+
) -> Specification:
|
341
411
|
try:
|
342
412
|
if not mustrd_triple_store:
|
343
413
|
mustrd_triple_store = {"type": TRIPLESTORE.RdfLib}
|
344
414
|
components = []
|
345
415
|
for predicate in MUST.given, MUST.when, MUST.then:
|
346
|
-
components.append(
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
416
|
+
components.append(
|
417
|
+
parse_spec_component(
|
418
|
+
subject=spec_uri,
|
419
|
+
predicate=predicate,
|
420
|
+
spec_graph=spec_graph,
|
421
|
+
run_config=run_config,
|
422
|
+
mustrd_triple_store=mustrd_triple_store,
|
423
|
+
)
|
424
|
+
)
|
351
425
|
|
352
426
|
spec_file_name = get_spec_file(spec_uri, spec_graph)
|
353
|
-
spec_file_path = Path(
|
354
|
-
|
427
|
+
spec_file_path = Path(
|
428
|
+
spec_graph.value(
|
429
|
+
subject=spec_uri,
|
430
|
+
predicate=MUST.specSourceFile,
|
431
|
+
default=Path("default.mustrd.ttl"),
|
432
|
+
)
|
433
|
+
)
|
355
434
|
# https://github.com/Semantic-partners/mustrd/issues/92
|
356
|
-
return Specification(
|
357
|
-
|
435
|
+
return Specification(
|
436
|
+
spec_uri,
|
437
|
+
mustrd_triple_store,
|
438
|
+
components[0].value,
|
439
|
+
components[1],
|
440
|
+
components[2],
|
441
|
+
spec_file_name,
|
442
|
+
spec_file_path,
|
443
|
+
)
|
358
444
|
|
359
445
|
except (ValueError, FileNotFoundError) as e:
|
360
|
-
template =
|
361
|
-
|
446
|
+
template = (
|
447
|
+
"An exception of type {0} occurred. Arguments:\n{1!r}\nStacktrace:\n{2}"
|
448
|
+
)
|
449
|
+
stacktrace = traceback.format_exc()
|
450
|
+
message = template.format(type(e).__name__, e.args, stacktrace)
|
362
451
|
log.exception(message)
|
363
452
|
raise
|
364
453
|
except ConnectionError as e:
|
@@ -367,53 +456,91 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
|
|
367
456
|
|
368
457
|
|
369
458
|
def check_result(spec: Specification, result: Union[str, Graph]):
|
459
|
+
|
460
|
+
log.debug(
|
461
|
+
f"check_result {spec.spec_uri=}, {spec.triple_store=}, {result=} {type(spec.then)}"
|
462
|
+
)
|
370
463
|
if isinstance(spec.then, TableThenSpec):
|
464
|
+
log.debug("table_comparison")
|
371
465
|
return table_comparison(result, spec)
|
372
466
|
else:
|
373
467
|
graph_compare = graph_comparison(spec.then.value, result)
|
374
468
|
if isomorphic(result, spec.then.value):
|
375
|
-
|
469
|
+
log.debug(f"isomorphic {spec}")
|
470
|
+
log.debug(f"{spec.spec_uri}")
|
471
|
+
log.debug(f"{spec.triple_store}")
|
472
|
+
ret = SpecPassed(spec.spec_uri, spec.triple_store["type"])
|
473
|
+
|
474
|
+
return ret
|
376
475
|
else:
|
476
|
+
log.debug("not isomorphic")
|
377
477
|
if spec.when[0].queryType == MUST.ConstructSparql:
|
378
|
-
|
478
|
+
log.debug("ConstructSpecFailure")
|
479
|
+
return ConstructSpecFailure(
|
480
|
+
spec.spec_uri, spec.triple_store["type"], graph_compare
|
481
|
+
)
|
379
482
|
else:
|
380
|
-
|
483
|
+
log.debug("UpdateSpecFailure")
|
484
|
+
return UpdateSpecFailure(
|
485
|
+
spec.spec_uri, spec.triple_store["type"], graph_compare
|
486
|
+
)
|
381
487
|
|
382
488
|
|
383
489
|
def run_spec(spec: Specification) -> SpecResult:
|
384
490
|
spec_uri = spec.spec_uri
|
385
491
|
triple_store = spec.triple_store
|
386
|
-
|
492
|
+
|
493
|
+
if not isinstance(spec, Specification):
|
494
|
+
log.warning(f"check_result called with non-Specification: {type(spec)}")
|
495
|
+
return spec
|
496
|
+
# return SpecSkipped(getattr(spec, 'spec_uri', None), getattr(spec, 'triple_store', {}), "Spec is not a valid Specification instance")
|
497
|
+
|
498
|
+
log.debug(f"run_spec {spec=}")
|
387
499
|
log.debug(
|
388
|
-
f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}"
|
500
|
+
f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}"
|
501
|
+
)
|
389
502
|
if spec.given:
|
390
503
|
given_as_turtle = spec.given.serialize(format="turtle")
|
391
504
|
log.debug(f"{given_as_turtle}")
|
392
505
|
upload_given(triple_store, spec.given)
|
393
506
|
else:
|
394
|
-
if triple_store[
|
395
|
-
return SpecSkipped(
|
507
|
+
if triple_store["type"] == TRIPLESTORE.RdfLib:
|
508
|
+
return SpecSkipped(
|
509
|
+
spec_uri,
|
510
|
+
triple_store["type"],
|
511
|
+
"Unable to run Inherited State tests on Rdflib",
|
512
|
+
)
|
396
513
|
try:
|
397
514
|
for when in spec.when:
|
398
515
|
log.info(
|
399
|
-
f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}"
|
516
|
+
f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}"
|
517
|
+
)
|
400
518
|
try:
|
401
|
-
result =
|
519
|
+
result = run_when_impl(spec_uri, triple_store, when)
|
520
|
+
log.info(
|
521
|
+
f"run {when.queryType} spec {spec_uri} on {triple_store['type']} {result=}"
|
522
|
+
)
|
402
523
|
except ParseException as e:
|
524
|
+
log.error(f"parseException {e}")
|
403
525
|
return SparqlParseFailure(spec_uri, triple_store["type"], e)
|
404
526
|
except NotImplementedError as ex:
|
405
|
-
|
527
|
+
log.error(f"NotImplementedError {ex}")
|
528
|
+
raise ex
|
529
|
+
# return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
|
406
530
|
return check_result(spec, result)
|
407
531
|
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout, OSError) as e:
|
408
532
|
# close_connection = False
|
533
|
+
stacktrace = traceback.format_exc()
|
409
534
|
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
|
410
535
|
message = template.format(type(e).__name__, e.args)
|
411
|
-
log.error(message)
|
536
|
+
log.error(message, exc_info=True)
|
412
537
|
return TripleStoreConnectionError(spec_uri, triple_store["type"], message)
|
413
538
|
except (TypeError, RequestException) as e:
|
414
|
-
log.error(f"{type(e)} {e}")
|
539
|
+
log.error(f"{type(e)} {e}", exc_info=True)
|
415
540
|
return SparqlExecutionError(spec_uri, triple_store["type"], e)
|
416
|
-
|
541
|
+
except Exception as e:
|
542
|
+
log.error(f"Unexpected error {e}", exc_info=True)
|
543
|
+
return RuntimeError(spec_uri, triple_store["type"], f"{type(e).__name__}: {e}")
|
417
544
|
# https://github.com/Semantic-partners/mustrd/issues/78
|
418
545
|
# finally:
|
419
546
|
# if type(mustrd_triple_store) == MustrdAnzo and close_connection:
|
@@ -424,8 +551,9 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
|
|
424
551
|
if secrets:
|
425
552
|
return Graph().parse(triple_store_graph_path).parse(data=secrets)
|
426
553
|
else:
|
427
|
-
secret_path = triple_store_graph_path.parent / Path(
|
428
|
-
|
554
|
+
secret_path = triple_store_graph_path.parent / Path(
|
555
|
+
triple_store_graph_path.stem + "_secrets" + triple_store_graph_path.suffix
|
556
|
+
)
|
429
557
|
return Graph().parse(triple_store_graph_path).parse(secret_path)
|
430
558
|
|
431
559
|
|
@@ -433,32 +561,40 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
|
|
433
561
|
def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
434
562
|
triple_stores = []
|
435
563
|
shacl_graph = Graph().parse(
|
436
|
-
Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl"))
|
564
|
+
Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl"))
|
565
|
+
)
|
437
566
|
ont_graph = Graph().parse(
|
438
|
-
Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl"))
|
567
|
+
Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl"))
|
568
|
+
)
|
439
569
|
# SHACL validation of triple store configuration
|
440
570
|
conforms, results_graph, results_text = validate(
|
441
571
|
data_graph=triple_store_graph,
|
442
572
|
shacl_graph=shacl_graph,
|
443
573
|
ont_graph=ont_graph,
|
444
574
|
advanced=True,
|
445
|
-
inference=
|
575
|
+
inference="none",
|
446
576
|
)
|
447
577
|
if not conforms:
|
448
|
-
raise ValueError(
|
449
|
-
|
450
|
-
|
578
|
+
raise ValueError(
|
579
|
+
f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
|
580
|
+
results_graph,
|
581
|
+
)
|
582
|
+
for triple_store_config, rdf_type, triple_store_type in triple_store_graph.triples(
|
583
|
+
(None, RDF.type, None)
|
584
|
+
):
|
451
585
|
triple_store = {}
|
452
586
|
triple_store["type"] = triple_store_type
|
453
587
|
triple_store["uri"] = triple_store_config
|
454
588
|
# Anzo graph via anzo
|
455
589
|
if triple_store_type == TRIPLESTORE.Anzo:
|
456
590
|
get_anzo_configuration(
|
457
|
-
triple_store, triple_store_graph, triple_store_config
|
591
|
+
triple_store, triple_store_graph, triple_store_config
|
592
|
+
)
|
458
593
|
# GraphDB
|
459
594
|
elif triple_store_type == TRIPLESTORE.GraphDb:
|
460
595
|
get_graphDB_configuration(
|
461
|
-
triple_store, triple_store_graph, triple_store_config
|
596
|
+
triple_store, triple_store_graph, triple_store_config
|
597
|
+
)
|
462
598
|
|
463
599
|
elif triple_store_type != TRIPLESTORE.RdfLib:
|
464
600
|
triple_store["error"] = f"Triple store not implemented: {triple_store_type}"
|
@@ -467,48 +603,74 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
|
467
603
|
return triple_stores
|
468
604
|
|
469
605
|
|
470
|
-
def get_anzo_configuration(
|
606
|
+
def get_anzo_configuration(
|
607
|
+
triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef
|
608
|
+
):
|
471
609
|
triple_store["url"] = triple_store_graph.value(
|
472
|
-
subject=triple_store_config, predicate=TRIPLESTORE.url
|
610
|
+
subject=triple_store_config, predicate=TRIPLESTORE.url
|
611
|
+
)
|
473
612
|
triple_store["port"] = triple_store_graph.value(
|
474
|
-
subject=triple_store_config, predicate=TRIPLESTORE.port
|
613
|
+
subject=triple_store_config, predicate=TRIPLESTORE.port
|
614
|
+
)
|
475
615
|
try:
|
476
|
-
triple_store["username"] = str(
|
477
|
-
|
478
|
-
|
479
|
-
|
616
|
+
triple_store["username"] = str(
|
617
|
+
triple_store_graph.value(
|
618
|
+
subject=triple_store_config, predicate=TRIPLESTORE.username
|
619
|
+
)
|
620
|
+
)
|
621
|
+
triple_store["password"] = str(
|
622
|
+
triple_store_graph.value(
|
623
|
+
subject=triple_store_config, predicate=TRIPLESTORE.password
|
624
|
+
)
|
625
|
+
)
|
480
626
|
except (FileNotFoundError, ValueError) as e:
|
481
627
|
triple_store["error"] = e
|
482
|
-
triple_store["gqe_uri"] = triple_store_graph.value(
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
628
|
+
triple_store["gqe_uri"] = triple_store_graph.value(
|
629
|
+
subject=triple_store_config, predicate=TRIPLESTORE.gqeURI
|
630
|
+
)
|
631
|
+
triple_store["input_graph"] = triple_store_graph.value(
|
632
|
+
subject=triple_store_config, predicate=TRIPLESTORE.inputGraph
|
633
|
+
)
|
634
|
+
triple_store["output_graph"] = triple_store_graph.value(
|
635
|
+
subject=triple_store_config, predicate=TRIPLESTORE.outputGraph
|
636
|
+
)
|
488
637
|
try:
|
489
638
|
check_triple_store_params(
|
490
|
-
triple_store, ["url", "port", "username", "password", "input_graph"]
|
639
|
+
triple_store, ["url", "port", "username", "password", "input_graph"]
|
640
|
+
)
|
491
641
|
except ValueError as e:
|
492
642
|
triple_store["error"] = e
|
493
643
|
|
494
644
|
|
495
|
-
def get_graphDB_configuration(
|
645
|
+
def get_graphDB_configuration(
|
646
|
+
triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef
|
647
|
+
):
|
496
648
|
triple_store["url"] = triple_store_graph.value(
|
497
|
-
subject=triple_store_config, predicate=TRIPLESTORE.url
|
649
|
+
subject=triple_store_config, predicate=TRIPLESTORE.url
|
650
|
+
)
|
498
651
|
triple_store["port"] = triple_store_graph.value(
|
499
|
-
subject=triple_store_config, predicate=TRIPLESTORE.port
|
652
|
+
subject=triple_store_config, predicate=TRIPLESTORE.port
|
653
|
+
)
|
500
654
|
try:
|
501
|
-
triple_store["username"] = str(
|
502
|
-
|
503
|
-
|
504
|
-
|
655
|
+
triple_store["username"] = str(
|
656
|
+
triple_store_graph.value(
|
657
|
+
subject=triple_store_config, predicate=TRIPLESTORE.username
|
658
|
+
)
|
659
|
+
)
|
660
|
+
triple_store["password"] = str(
|
661
|
+
triple_store_graph.value(
|
662
|
+
subject=triple_store_config, predicate=TRIPLESTORE.password
|
663
|
+
)
|
664
|
+
)
|
505
665
|
except (FileNotFoundError, ValueError) as e:
|
506
666
|
log.error(f"Credential retrieval failed {e}")
|
507
667
|
triple_store["error"] = e
|
508
|
-
triple_store["repository"] = triple_store_graph.value(
|
509
|
-
|
510
|
-
|
511
|
-
|
668
|
+
triple_store["repository"] = triple_store_graph.value(
|
669
|
+
subject=triple_store_config, predicate=TRIPLESTORE.repository
|
670
|
+
)
|
671
|
+
triple_store["input_graph"] = triple_store_graph.value(
|
672
|
+
subject=triple_store_config, predicate=TRIPLESTORE.inputGraph
|
673
|
+
)
|
512
674
|
try:
|
513
675
|
check_triple_store_params(triple_store, ["url", "repository"])
|
514
676
|
except ValueError as e:
|
@@ -517,18 +679,26 @@ def get_graphDB_configuration(triple_store: dict, triple_store_graph: Graph, tri
|
|
517
679
|
|
518
680
|
def check_triple_store_params(triple_store: dict, required_params: List[str]):
|
519
681
|
missing_params = [
|
520
|
-
param for param in required_params if triple_store.get(param) is None
|
682
|
+
param for param in required_params if triple_store.get(param) is None
|
683
|
+
]
|
521
684
|
if missing_params:
|
522
|
-
raise ValueError(
|
523
|
-
|
685
|
+
raise ValueError(
|
686
|
+
f"Cannot establish connection to {triple_store['type']}. "
|
687
|
+
f"Missing required parameter(s): {', '.join(missing_params)}."
|
688
|
+
)
|
524
689
|
|
525
690
|
|
526
|
-
def get_credential_from_file(
|
691
|
+
def get_credential_from_file(
|
692
|
+
triple_store_name: URIRef, credential: str, config_path: Literal
|
693
|
+
) -> str:
|
527
694
|
log.info(
|
528
|
-
f"get_credential_from_file {triple_store_name}, {credential}, {config_path}"
|
695
|
+
f"get_credential_from_file {triple_store_name}, {credential}, {config_path}"
|
696
|
+
)
|
529
697
|
if not config_path:
|
530
|
-
raise ValueError(
|
531
|
-
|
698
|
+
raise ValueError(
|
699
|
+
f"Cannot establish connection defined in {triple_store_name}. "
|
700
|
+
f"Missing required parameter: {credential}."
|
701
|
+
)
|
532
702
|
path = Path(config_path)
|
533
703
|
log.info(f"get_credential_from_file {path}")
|
534
704
|
|
@@ -564,9 +734,11 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
|
|
564
734
|
else:
|
565
735
|
values.append(str(XSD.anyURI))
|
566
736
|
|
567
|
-
frames = pandas.concat(
|
568
|
-
[values], columns=columns)],
|
569
|
-
|
737
|
+
frames = pandas.concat(
|
738
|
+
objs=[frames, pandas.DataFrame([values], columns=columns)],
|
739
|
+
ignore_index=True,
|
740
|
+
)
|
741
|
+
frames.fillna("", inplace=True)
|
570
742
|
|
571
743
|
if frames.size == 0:
|
572
744
|
frames = pandas.DataFrame()
|
@@ -577,7 +749,8 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
577
749
|
warning = None
|
578
750
|
order_list = ["order by ?", "order by desc", "order by asc"]
|
579
751
|
ordered_result = any(
|
580
|
-
pattern in spec.when[0].value.lower() for pattern in order_list
|
752
|
+
pattern in spec.when[0].value.lower() for pattern in order_list
|
753
|
+
)
|
581
754
|
|
582
755
|
# If sparql query doesn't contain order by clause, but order is define in then spec:
|
583
756
|
# Then ignore order in then spec and print a warning
|
@@ -588,27 +761,40 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
588
761
|
# If sparql query contains an order by clause and then spec is not order:
|
589
762
|
# Spec is inconsistent
|
590
763
|
if ordered_result and not spec.then.ordered:
|
591
|
-
message =
|
592
|
-
|
764
|
+
message = (
|
765
|
+
"Actual result is ordered, must:then must contain sh:order on every row."
|
766
|
+
)
|
767
|
+
return SelectSpecFailure(
|
768
|
+
spec.spec_uri, spec.triple_store["type"], None, message
|
769
|
+
)
|
593
770
|
|
594
771
|
# Convert results to dataframe
|
595
772
|
if is_json(result):
|
596
773
|
df = json_results_to_panda_dataframe(result)
|
597
774
|
else:
|
598
|
-
return SelectSpecFailure(
|
775
|
+
return SelectSpecFailure(
|
776
|
+
spec.spec_uri,
|
777
|
+
spec.triple_store["type"],
|
778
|
+
None,
|
779
|
+
"Sparql result is not in JSON",
|
780
|
+
)
|
599
781
|
|
600
782
|
# Compare result with expected
|
601
783
|
df_diff, message = compare_table_results(df, spec)
|
602
784
|
|
603
785
|
if df_diff.empty:
|
604
786
|
if warning:
|
605
|
-
return SpecPassedWithWarning(
|
787
|
+
return SpecPassedWithWarning(
|
788
|
+
spec.spec_uri, spec.triple_store["type"], warning
|
789
|
+
)
|
606
790
|
else:
|
607
791
|
return SpecPassed(spec.spec_uri, spec.triple_store["type"])
|
608
792
|
else:
|
609
793
|
log.error("\n" + df_diff.to_markdown())
|
610
794
|
log.error(message)
|
611
|
-
return SelectSpecFailure(
|
795
|
+
return SelectSpecFailure(
|
796
|
+
spec.spec_uri, spec.triple_store["type"], df_diff, message
|
797
|
+
)
|
612
798
|
|
613
799
|
|
614
800
|
def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
|
@@ -616,7 +802,8 @@ def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
|
|
616
802
|
|
617
803
|
|
618
804
|
compare_table_results = MultiMethod(
|
619
|
-
"compare_table_results", compare_table_results_dispatch
|
805
|
+
"compare_table_results", compare_table_results_dispatch
|
806
|
+
)
|
620
807
|
|
621
808
|
|
622
809
|
# Scenario 1: expected a result and got a result
|
@@ -628,7 +815,8 @@ def _compare_results(resultDf: DataFrame, spec: Specification):
|
|
628
815
|
sorted_then_cols = sorted(list(then))
|
629
816
|
order_list = ["order by ?", "order by desc", "order by asc"]
|
630
817
|
ordered_result = any(
|
631
|
-
pattern in spec.when[0].value.lower() for pattern in order_list
|
818
|
+
pattern in spec.when[0].value.lower() for pattern in order_list
|
819
|
+
)
|
632
820
|
|
633
821
|
if not ordered_result:
|
634
822
|
resultDf.sort_values(by=list(resultDf.columns)[::2], inplace=True)
|
@@ -640,9 +828,11 @@ def _compare_results(resultDf: DataFrame, spec: Specification):
|
|
640
828
|
if not ordered_result:
|
641
829
|
then.sort_values(by=columns[::2], inplace=True)
|
642
830
|
then.reset_index(drop=True, inplace=True)
|
643
|
-
if
|
644
|
-
|
645
|
-
|
831
|
+
if (
|
832
|
+
resultDf.shape == then.shape
|
833
|
+
and (resultDf.columns == then.columns).all()
|
834
|
+
):
|
835
|
+
df_diff = then.compare(resultDf, result_names=("expected", "actual"))
|
646
836
|
else:
|
647
837
|
df_diff = construct_df_diff(resultDf, then)
|
648
838
|
else:
|
@@ -654,8 +844,12 @@ def _compare_results(resultDf: DataFrame, spec: Specification):
|
|
654
844
|
resultDf = resultDf[sorted_columns]
|
655
845
|
df_diff = construct_df_diff(resultDf, then)
|
656
846
|
|
657
|
-
message = build_summary_message(
|
658
|
-
then.shape[
|
847
|
+
message = build_summary_message(
|
848
|
+
then.shape[0],
|
849
|
+
round(then.shape[1] / 2),
|
850
|
+
resultDf.shape[0],
|
851
|
+
round(resultDf.shape[1] / 2),
|
852
|
+
)
|
659
853
|
return df_diff, message
|
660
854
|
|
661
855
|
|
@@ -665,7 +859,9 @@ def _unexpected_results(resultDf: DataFrame, spec: Specification):
|
|
665
859
|
empty_then = create_empty_dataframe_with_columns(resultDf)
|
666
860
|
df_diff = empty_then.compare(resultDf, result_names=("expected", "actual"))
|
667
861
|
|
668
|
-
return df_diff, build_summary_message(
|
862
|
+
return df_diff, build_summary_message(
|
863
|
+
0, 0, resultDf.shape[0], round(resultDf.shape[1] / 2)
|
864
|
+
)
|
669
865
|
|
670
866
|
|
671
867
|
# Scenario 3: expected a result, but got an empty result
|
@@ -689,8 +885,10 @@ def _no_results(resultDf: DataFrame, spec: Specification):
|
|
689
885
|
|
690
886
|
|
691
887
|
def build_summary_message(expected_rows, expected_columns, got_rows, got_columns):
|
692
|
-
return
|
888
|
+
return (
|
889
|
+
f"Expected {expected_rows} row(s) and {expected_columns} column(s), "
|
693
890
|
f"got {got_rows} row(s) and {got_columns} column(s)"
|
891
|
+
)
|
694
892
|
|
695
893
|
|
696
894
|
def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphComparison:
|
@@ -698,9 +896,11 @@ def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphCompari
|
|
698
896
|
in_both = diff[0]
|
699
897
|
in_expected = diff[1]
|
700
898
|
in_actual = diff[2]
|
701
|
-
in_expected_not_in_actual =
|
702
|
-
in_actual_not_in_expected =
|
703
|
-
return GraphComparison(
|
899
|
+
in_expected_not_in_actual = in_expected - in_actual
|
900
|
+
in_actual_not_in_expected = in_actual - in_expected
|
901
|
+
return GraphComparison(
|
902
|
+
in_expected_not_in_actual, in_actual_not_in_expected, in_both
|
903
|
+
)
|
704
904
|
|
705
905
|
|
706
906
|
def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
|
@@ -724,45 +924,45 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
|
|
724
924
|
return expected_results
|
725
925
|
|
726
926
|
|
727
|
-
def write_result_diff_to_log(res):
|
927
|
+
def write_result_diff_to_log(res, info):
|
728
928
|
if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
log.info(f"{Fore.GREEN} in_both")
|
737
|
-
log.info(res.graph_comparison.in_both.serialize(format="ttl"))
|
929
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
930
|
+
info(f"{Fore.BLUE} In Expected Not In Actual:")
|
931
|
+
info(res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
|
932
|
+
info(f"{Fore.RED} in_actual_not_in_expected")
|
933
|
+
info(res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
|
934
|
+
info(f"{Fore.GREEN} in_both")
|
935
|
+
info(res.graph_comparison.in_both.serialize(format="ttl"))
|
738
936
|
|
739
937
|
if isinstance(res, SelectSpecFailure):
|
740
|
-
|
741
|
-
|
742
|
-
|
938
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
939
|
+
info(res.message)
|
940
|
+
info(res.table_comparison.to_markdown())
|
743
941
|
if isinstance(res, SpecPassedWithWarning):
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
942
|
+
info(f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
|
943
|
+
info(res.warning)
|
944
|
+
if (
|
945
|
+
isinstance(res, TripleStoreConnectionError)
|
946
|
+
or isinstance(res, SparqlExecutionError)
|
947
|
+
or isinstance(res, SparqlParseFailure)
|
948
|
+
):
|
949
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
950
|
+
info(res.exception)
|
751
951
|
if isinstance(res, SpecSkipped):
|
752
|
-
|
753
|
-
|
952
|
+
info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
953
|
+
info(res.message)
|
754
954
|
|
755
955
|
|
756
|
-
def calculate_row_difference(
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
actual_rows =
|
956
|
+
def calculate_row_difference(
|
957
|
+
df1: pandas.DataFrame, df2: pandas.DataFrame
|
958
|
+
) -> pandas.DataFrame:
|
959
|
+
df_all = df1.merge(df2.drop_duplicates(), how="left", indicator=True)
|
960
|
+
actual_rows = df_all[df_all["_merge"] == "left_only"]
|
961
|
+
actual_rows = actual_rows.drop("_merge", axis=1)
|
761
962
|
return actual_rows
|
762
963
|
|
763
964
|
|
764
|
-
def construct_df_diff(df: pandas.DataFrame,
|
765
|
-
then: pandas.DataFrame) -> pandas.DataFrame:
|
965
|
+
def construct_df_diff(df: pandas.DataFrame, then: pandas.DataFrame) -> pandas.DataFrame:
|
766
966
|
actual_rows = calculate_row_difference(df, then)
|
767
967
|
expected_rows = calculate_row_difference(then, df)
|
768
968
|
actual_columns = df.columns.difference(then.columns)
|
@@ -774,15 +974,19 @@ def construct_df_diff(df: pandas.DataFrame,
|
|
774
974
|
|
775
975
|
if actual_columns.size > 0:
|
776
976
|
modified_then = modified_then.reindex(
|
777
|
-
modified_then.columns.to_list() + actual_columns.to_list(), axis=1
|
778
|
-
|
779
|
-
|
977
|
+
modified_then.columns.to_list() + actual_columns.to_list(), axis=1
|
978
|
+
)
|
979
|
+
modified_then[actual_columns.to_list()] = modified_then[
|
980
|
+
actual_columns.to_list()
|
981
|
+
].fillna("")
|
780
982
|
|
781
983
|
if expected_columns.size > 0:
|
782
984
|
modified_df = modified_df.reindex(
|
783
|
-
modified_df.columns.to_list() + expected_columns.to_list(), axis=1
|
784
|
-
|
785
|
-
|
985
|
+
modified_df.columns.to_list() + expected_columns.to_list(), axis=1
|
986
|
+
)
|
987
|
+
modified_df[expected_columns.to_list()] = modified_df[
|
988
|
+
expected_columns.to_list()
|
989
|
+
].fillna("")
|
786
990
|
|
787
991
|
modified_df = modified_df.reindex(modified_then.columns, axis=1)
|
788
992
|
|
@@ -794,29 +998,37 @@ def construct_df_diff(df: pandas.DataFrame,
|
|
794
998
|
elif actual_rows.shape[0] > 0 or expected_rows.shape[0] > 0:
|
795
999
|
df_diff = generate_row_diff(actual_rows, expected_rows)
|
796
1000
|
elif actual_columns.size > 0 or expected_columns.size > 0:
|
797
|
-
df_diff = modified_then.compare(
|
798
|
-
|
1001
|
+
df_diff = modified_then.compare(
|
1002
|
+
modified_df,
|
1003
|
+
result_names=("expected", "actual"),
|
1004
|
+
keep_shape=True,
|
1005
|
+
keep_equal=True,
|
1006
|
+
)
|
799
1007
|
df_diff.fillna("", inplace=True)
|
800
1008
|
return df_diff
|
801
1009
|
|
802
1010
|
|
803
|
-
def generate_row_diff(
|
1011
|
+
def generate_row_diff(
|
1012
|
+
actual_rows: pandas.DataFrame, expected_rows: pandas.DataFrame
|
1013
|
+
) -> pandas.DataFrame:
|
804
1014
|
df_diff_actual_rows = pandas.DataFrame()
|
805
1015
|
df_diff_expected_rows = pandas.DataFrame()
|
806
1016
|
|
807
1017
|
if actual_rows.shape[0] > 0:
|
808
1018
|
empty_actual_copy = create_empty_dataframe_with_columns(actual_rows)
|
809
1019
|
df_diff_actual_rows = empty_actual_copy.compare(
|
810
|
-
actual_rows, result_names=("expected", "actual")
|
1020
|
+
actual_rows, result_names=("expected", "actual")
|
1021
|
+
)
|
811
1022
|
|
812
1023
|
if expected_rows.shape[0] > 0:
|
813
|
-
empty_expected_copy = create_empty_dataframe_with_columns(
|
814
|
-
expected_rows)
|
1024
|
+
empty_expected_copy = create_empty_dataframe_with_columns(expected_rows)
|
815
1025
|
df_diff_expected_rows = expected_rows.compare(
|
816
|
-
empty_expected_copy, result_names=("expected", "actual")
|
1026
|
+
empty_expected_copy, result_names=("expected", "actual")
|
1027
|
+
)
|
817
1028
|
|
818
1029
|
df_diff_rows = pandas.concat(
|
819
|
-
[df_diff_actual_rows, df_diff_expected_rows], ignore_index=True
|
1030
|
+
[df_diff_actual_rows, df_diff_expected_rows], ignore_index=True
|
1031
|
+
)
|
820
1032
|
return df_diff_rows
|
821
1033
|
|
822
1034
|
|
@@ -831,40 +1043,76 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
831
1043
|
# Init dictionaries
|
832
1044
|
status_dict = defaultdict(lambda: defaultdict(int))
|
833
1045
|
status_counts = defaultdict(lambda: defaultdict(int))
|
834
|
-
colours = {
|
835
|
-
|
1046
|
+
colours = {
|
1047
|
+
SpecPassed: Fore.GREEN,
|
1048
|
+
SpecPassedWithWarning: Fore.YELLOW,
|
1049
|
+
SpecSkipped: Fore.YELLOW,
|
1050
|
+
}
|
836
1051
|
# Populate dictionaries from results
|
837
1052
|
for result in results:
|
838
1053
|
status_counts[result.triple_store][type(result)] += 1
|
839
1054
|
status_dict[result.spec_uri][result.triple_store] = type(result)
|
840
1055
|
|
841
1056
|
# Get the list of statuses and list of unique triple stores
|
842
|
-
statuses = list(
|
843
|
-
|
844
|
-
|
845
|
-
|
1057
|
+
statuses = list(
|
1058
|
+
status for inner_dict in status_dict.values() for status in inner_dict.values()
|
1059
|
+
)
|
1060
|
+
triple_stores = list(
|
1061
|
+
set(
|
1062
|
+
status
|
1063
|
+
for inner_dict in status_dict.values()
|
1064
|
+
for status in inner_dict.keys()
|
1065
|
+
)
|
1066
|
+
)
|
846
1067
|
|
847
1068
|
# Convert dictionaries to list for tabulate
|
848
|
-
table_rows = [
|
849
|
-
|
1069
|
+
table_rows = [
|
1070
|
+
[spec_uri]
|
1071
|
+
+ [
|
1072
|
+
f"""{colours.get(status_dict[spec_uri][triple_store], Fore.RED)}
|
850
1073
|
{status_dict[spec_uri][triple_store].__name__}{Style.RESET_ALL}"""
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
1074
|
+
for triple_store in triple_stores
|
1075
|
+
]
|
1076
|
+
for spec_uri in set(status_dict.keys())
|
1077
|
+
]
|
1078
|
+
|
1079
|
+
status_rows = [
|
1080
|
+
[f"{colours.get(status, Fore.RED)}{status.__name__}{Style.RESET_ALL}"]
|
1081
|
+
+ [
|
1082
|
+
f"{colours.get(status, Fore.RED)}{status_counts[triple_store][status]}{Style.RESET_ALL}"
|
1083
|
+
for triple_store in triple_stores
|
1084
|
+
]
|
1085
|
+
for status in set(statuses)
|
1086
|
+
]
|
856
1087
|
|
857
1088
|
# Display tables with tabulate
|
858
|
-
log.info(
|
859
|
-
|
860
|
-
|
861
|
-
|
1089
|
+
log.info(
|
1090
|
+
tabulate(
|
1091
|
+
table_rows,
|
1092
|
+
headers=["Spec Uris / triple stores"] + triple_stores,
|
1093
|
+
tablefmt="pretty",
|
1094
|
+
)
|
1095
|
+
)
|
1096
|
+
log.info(
|
1097
|
+
tabulate(
|
1098
|
+
status_rows,
|
1099
|
+
headers=["Status / triple stores"] + triple_stores,
|
1100
|
+
tablefmt="pretty",
|
1101
|
+
)
|
1102
|
+
)
|
862
1103
|
|
863
1104
|
pass_count = statuses.count(SpecPassed)
|
864
1105
|
warning_count = statuses.count(SpecPassedWithWarning)
|
865
1106
|
skipped_count = statuses.count(SpecSkipped)
|
866
1107
|
fail_count = len(
|
867
|
-
list(
|
1108
|
+
list(
|
1109
|
+
filter(
|
1110
|
+
lambda status: status
|
1111
|
+
not in [SpecPassed, SpecPassedWithWarning, SpecSkipped],
|
1112
|
+
statuses,
|
1113
|
+
)
|
1114
|
+
)
|
1115
|
+
)
|
868
1116
|
|
869
1117
|
if fail_count:
|
870
1118
|
overview_colour = Fore.RED
|
@@ -874,8 +1122,10 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
874
1122
|
overview_colour = Fore.GREEN
|
875
1123
|
|
876
1124
|
logger_setup.flush()
|
877
|
-
log.info(
|
878
|
-
|
1125
|
+
log.info(
|
1126
|
+
f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
|
1127
|
+
f"{overview_colour}{warning_count} passed with warnings ====="
|
1128
|
+
)
|
879
1129
|
|
880
1130
|
if verbose and (fail_count or warning_count or skipped_count):
|
881
1131
|
display_verbose(results)
|
@@ -887,11 +1137,13 @@ def display_verbose(results: List[SpecResult]):
|
|
887
1137
|
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
888
1138
|
log.info(f"{Fore.BLUE} In Expected Not In Actual:")
|
889
1139
|
log.info(
|
890
|
-
res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl")
|
1140
|
+
res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl")
|
1141
|
+
)
|
891
1142
|
log.info()
|
892
1143
|
log.info(f"{Fore.RED} in_actual_not_in_expected")
|
893
1144
|
log.info(
|
894
|
-
res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl")
|
1145
|
+
res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl")
|
1146
|
+
)
|
895
1147
|
log.info(f"{Fore.GREEN} in_both")
|
896
1148
|
log.info(res.graph_comparison.in_both.serialize(format="ttl"))
|
897
1149
|
|
@@ -903,12 +1155,33 @@ def display_verbose(results: List[SpecResult]):
|
|
903
1155
|
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
904
1156
|
if isinstance(res, SpecPassedWithWarning):
|
905
1157
|
log.info(
|
906
|
-
f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}"
|
1158
|
+
f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}"
|
1159
|
+
)
|
907
1160
|
log.info(res.warning)
|
908
|
-
if
|
909
|
-
|
1161
|
+
if (
|
1162
|
+
isinstance(res, TripleStoreConnectionError)
|
1163
|
+
or type(res, SparqlExecutionError)
|
1164
|
+
or isinstance(res, SparqlParseFailure)
|
1165
|
+
):
|
910
1166
|
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
911
1167
|
log.info(res.exception)
|
912
1168
|
if isinstance(res, SpecSkipped):
|
913
1169
|
log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
914
1170
|
log.info(res.message)
|
1171
|
+
|
1172
|
+
|
1173
|
+
# Preserve the original run_when_impl multimethod
|
1174
|
+
original_run_when_impl = run_when_impl
|
1175
|
+
|
1176
|
+
|
1177
|
+
# Wrapper function for logging inputs and outputs of run_when
|
1178
|
+
def run_when_with_logging(*args, **kwargs):
|
1179
|
+
log.debug(f"run_when called with args: {args}, kwargs: {kwargs}")
|
1180
|
+
result = original_run_when_impl(*args, **kwargs) # Call the original multimethod
|
1181
|
+
log.debug(f"run_when returned: {result}")
|
1182
|
+
return result
|
1183
|
+
|
1184
|
+
|
1185
|
+
# Replace the original run_when_impl with the wrapped version
|
1186
|
+
run_when_impl = run_when_with_logging
|
1187
|
+
run_when = run_when_impl
|