mustrd 0.2.7a0__tar.gz → 0.3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/PKG-INFO +1 -1
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/anzo_utils.py +8 -5
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/logger_setup.py +1 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdShapes.ttl +11 -2
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/ontology.ttl +5 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrd.py +88 -30
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdAnzo.py +3 -2
- mustrd-0.3.0.0/mustrd/mustrdTestPlugin.py +549 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/spec_component.py +14 -13
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/steprunner.py +14 -4
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/pyproject.toml +1 -1
- mustrd-0.2.7a0/mustrd/mustrdTestPlugin.py +0 -378
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/LICENSE +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/README.md +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/README.adoc +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/README.md +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/TestResult.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/__init__.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/catalog-v001.xml +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdTestOntology.ttl +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdTestShapes.ttl +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/test-resources/resources.ttl +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/triplestoreOntology.ttl +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/triplestoreshapes.ttl +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdGraphDb.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdRdfLib.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/namespace.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/run.py +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_ResultList_leaf_template.jinja +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_ResultList_template.jinja +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/templates/md_stats_template.jinja +0 -0
- {mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/utils.py +0 -0
@@ -31,16 +31,18 @@ from requests import Response, HTTPError, RequestException
|
|
31
31
|
from bs4 import BeautifulSoup
|
32
32
|
import logging
|
33
33
|
|
34
|
+
logger = logging.getLogger()
|
35
|
+
|
34
36
|
|
35
37
|
def query_azg(anzo_config: dict, query: str,
|
36
38
|
format: str = "json", is_update: bool = False,
|
37
39
|
data_layers: List[str] = None):
|
38
40
|
params = {
|
39
|
-
'skipCache':
|
41
|
+
'skipCache': 'true',
|
40
42
|
'format': format,
|
41
43
|
'datasourceURI': anzo_config['gqe_uri'],
|
42
|
-
'default-graph-uri': data_layers,
|
43
|
-
'named-graph-uri': data_layers
|
44
|
+
'using-graph-uri' if is_update else 'default-graph-uri': data_layers,
|
45
|
+
'using-named-graph-uri' if is_update else 'named-graph-uri': data_layers
|
44
46
|
}
|
45
47
|
url = f"{anzo_config['url']}/sparql"
|
46
48
|
return send_anzo_query(anzo_config, url=url, params=params, query=query, is_update=is_update)
|
@@ -52,7 +54,7 @@ def query_graphmart(anzo_config: dict,
|
|
52
54
|
format: str = "json",
|
53
55
|
data_layers: List[str] = None):
|
54
56
|
params = {
|
55
|
-
'skipCache':
|
57
|
+
'skipCache': 'true',
|
56
58
|
'format': format,
|
57
59
|
'default-graph-uri': data_layers,
|
58
60
|
'named-graph-uri': data_layers
|
@@ -87,7 +89,8 @@ def manage_anzo_response(response: Response) -> str:
|
|
87
89
|
|
88
90
|
def send_anzo_query(anzo_config, url, params, query, is_update=False):
|
89
91
|
headers = {"Content-Type": f"application/sparql-{'update' if is_update else 'query' }"}
|
90
|
-
|
92
|
+
logger.debug(f"send_anzo_query {url=} {query=} {is_update=}")
|
93
|
+
return manage_anzo_response(requests.post(url=url, params=params, data=query.encode('utf-8'),
|
91
94
|
auth=(anzo_config['username'], anzo_config['password']),
|
92
95
|
headers=headers, verify=False))
|
93
96
|
|
@@ -249,5 +249,14 @@ must:AnzoGraphmartQueryDrivenTemplatedStepSparqlSourceShape
|
|
249
249
|
sh:minCount 1 ;
|
250
250
|
sh:maxCount 1 ; ] .
|
251
251
|
|
252
|
-
|
253
|
-
|
252
|
+
must:SpadeEdnGroupSourceShape
|
253
|
+
a sh:NodeShape ;
|
254
|
+
sh:targetClass must:SpadeEdnGroupSource ;
|
255
|
+
sh:property [ sh:path must:file ;
|
256
|
+
sh:message "A SpadeEdnGroupSource must have a file property pointing to the spade.edn config." ;
|
257
|
+
sh:minCount 1 ;
|
258
|
+
sh:maxCount 1 ; ] ;
|
259
|
+
sh:property [ sh:path must:groupId ;
|
260
|
+
sh:message "A SpadeEdnGroupSource must have a groupId property referencing the group in the EDN file." ;
|
261
|
+
sh:minCount 1 ;
|
262
|
+
sh:maxCount 1 ; ] .
|
@@ -461,6 +461,11 @@ sh:order rdf:type owl:DatatypeProperty ;
|
|
461
461
|
rdfs:isDefinedBy : ;
|
462
462
|
rdfs:label "AnzoGraphmartQueryDrivenTemplatedStepSparqlSource" .
|
463
463
|
|
464
|
+
### https://mustrd.com/model/SpadeEdnGroupSource
|
465
|
+
:SpadeEdnGroupSource rdf:type owl:Class ;
|
466
|
+
rdfs:subClassOf :SparqlSource ;
|
467
|
+
rdfs:comment "Allows reference to a spade.edn file, and a specific groupid (think Anzo layer), within that" ;
|
468
|
+
rdfs:label "SpadeEdnGroupSource" .
|
464
469
|
|
465
470
|
### https://mustrd.com/model/Then
|
466
471
|
:Then rdf:type owl:Class ;
|
@@ -55,8 +55,9 @@ import logging
|
|
55
55
|
from http.client import HTTPConnection
|
56
56
|
from .steprunner import upload_given, run_when
|
57
57
|
from multimethods import MultiMethod
|
58
|
+
import traceback
|
58
59
|
|
59
|
-
log =
|
60
|
+
log = logging.getLogger(__name__)
|
60
61
|
|
61
62
|
requests.packages.urllib3.disable_warnings()
|
62
63
|
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
|
@@ -184,20 +185,33 @@ class UpdateSparqlQuery(SparqlAction):
|
|
184
185
|
|
185
186
|
# https://github.com/Semantic-partners/mustrd/issues/19
|
186
187
|
# Validate the specs found in spec_path
|
187
|
-
def validate_specs(run_config: dict,
|
188
|
+
def validate_specs(run_config: dict,
|
189
|
+
triple_stores: List,
|
190
|
+
shacl_graph: Graph,
|
191
|
+
ont_graph: Graph,
|
192
|
+
file_name: str = "*",
|
193
|
+
selected_test_files: List[str] = [])\
|
188
194
|
-> Tuple[List, Graph, List]:
|
189
195
|
spec_graph = Graph()
|
190
196
|
subject_uris = set()
|
191
197
|
focus_uris = set()
|
192
198
|
invalid_specs = []
|
193
|
-
ttl_files =
|
194
|
-
|
199
|
+
ttl_files = []
|
200
|
+
|
201
|
+
if not selected_test_files:
|
202
|
+
ttl_files = list(run_config['spec_path'].glob(
|
203
|
+
f'**/{file_name}.mustrd.ttl'))
|
204
|
+
log.info(
|
205
|
+
f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
|
206
|
+
else:
|
207
|
+
ttl_files = selected_test_files
|
208
|
+
|
209
|
+
log.info(f"Using {ttl_files} for test source")
|
195
210
|
ttl_files.sort()
|
196
|
-
log.info(
|
197
|
-
f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
|
198
211
|
|
199
212
|
# For each spec file found in spec_path
|
200
213
|
for file in ttl_files:
|
214
|
+
# file = file.resolve()
|
201
215
|
error_messages = []
|
202
216
|
|
203
217
|
log.info(f"Parse: {file}")
|
@@ -224,7 +238,10 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
224
238
|
advanced=True,
|
225
239
|
js=False,
|
226
240
|
debug=False)
|
227
|
-
|
241
|
+
if str(file.name).endswith("_duplicate"):
|
242
|
+
log.debug(f"Validation of {file.name} against SHACL shapes: {conforms}")
|
243
|
+
log.debug(f"{results_graph.serialize(format='turtle')}")
|
244
|
+
# log.debug(f"SHACL validation results: {results_text}")
|
228
245
|
# Add error message if not conform to spec shapes
|
229
246
|
if not conforms:
|
230
247
|
for msg in results_graph.objects(predicate=SH.resultMessage):
|
@@ -269,6 +286,10 @@ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple
|
|
269
286
|
error_messages: list, invalid_specs: list, spec_graph: Graph):
|
270
287
|
|
271
288
|
for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
|
289
|
+
# Always add file name and source file to the graph for error reporting
|
290
|
+
file_graph.add([subject_uri, MUST.specSourceFile, Literal(str(file))])
|
291
|
+
file_graph.add([subject_uri, MUST.specFileName, Literal(file.name)])
|
292
|
+
|
272
293
|
# If we already collected a URI, then we tag it as duplicate and it won't be executed
|
273
294
|
if subject_uri in subject_uris:
|
274
295
|
log.warning(
|
@@ -311,8 +332,11 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
|
|
311
332
|
specs += [get_spec(spec_uri, spec_graph,
|
312
333
|
run_config, triple_store)]
|
313
334
|
except (ValueError, FileNotFoundError, ConnectionError) as e:
|
335
|
+
# Try to get file name/path from the graph, but fallback to "unknown"
|
336
|
+
file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName) or "unknown"
|
337
|
+
file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile) or "unknown"
|
314
338
|
skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
|
315
|
-
e,
|
339
|
+
str(e), str(file_name), Path(file_path))]
|
316
340
|
|
317
341
|
except (BadSyntax, FileNotFoundError) as e:
|
318
342
|
template = "An exception of type {0} occurred when trying to parse the triple store configuration file. " \
|
@@ -334,7 +358,14 @@ def run_specs(specs) -> List[SpecResult]:
|
|
334
358
|
|
335
359
|
|
336
360
|
def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
|
337
|
-
|
361
|
+
file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName)
|
362
|
+
if file_name:
|
363
|
+
return str(file_name)
|
364
|
+
# fallback: try to get from MUST.specSourceFile
|
365
|
+
file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile)
|
366
|
+
if file_path:
|
367
|
+
return str(Path(file_path).name)
|
368
|
+
return "default.mustrd.ttl"
|
338
369
|
|
339
370
|
|
340
371
|
def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
|
@@ -367,23 +398,42 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
|
|
367
398
|
|
368
399
|
|
369
400
|
def check_result(spec: Specification, result: Union[str, Graph]):
|
401
|
+
|
402
|
+
log.debug(
|
403
|
+
f"check_result {spec.spec_uri=}, {spec.triple_store=}, {result=} {type(spec.then)}")
|
370
404
|
if isinstance(spec.then, TableThenSpec):
|
405
|
+
log.debug("table_comparison")
|
371
406
|
return table_comparison(result, spec)
|
372
407
|
else:
|
373
408
|
graph_compare = graph_comparison(spec.then.value, result)
|
374
409
|
if isomorphic(result, spec.then.value):
|
375
|
-
|
410
|
+
log.debug(f"isomorphic {spec}")
|
411
|
+
log.debug(f"{spec.spec_uri}")
|
412
|
+
log.debug(f"{spec.triple_store}")
|
413
|
+
ret = SpecPassed(spec.spec_uri, spec.triple_store["type"])
|
414
|
+
|
415
|
+
return ret
|
376
416
|
else:
|
417
|
+
log.debug("not isomorphic")
|
377
418
|
if spec.when[0].queryType == MUST.ConstructSparql:
|
419
|
+
log.debug("ConstructSpecFailure")
|
378
420
|
return ConstructSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
|
379
421
|
else:
|
422
|
+
log.debug("UpdateSpecFailure")
|
380
423
|
return UpdateSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
|
381
424
|
|
382
425
|
|
383
426
|
def run_spec(spec: Specification) -> SpecResult:
|
384
427
|
spec_uri = spec.spec_uri
|
385
428
|
triple_store = spec.triple_store
|
386
|
-
|
429
|
+
|
430
|
+
if not isinstance(spec, Specification):
|
431
|
+
log.warning(f"check_result called with non-Specification: {type(spec)}")
|
432
|
+
return spec
|
433
|
+
# return SpecSkipped(getattr(spec, 'spec_uri', None), getattr(spec, 'triple_store', {}), "Spec is not a valid Specification instance")
|
434
|
+
|
435
|
+
log.debug(
|
436
|
+
f"run_spec {spec=}")
|
387
437
|
log.debug(
|
388
438
|
f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
|
389
439
|
if spec.given:
|
@@ -399,10 +449,16 @@ def run_spec(spec: Specification) -> SpecResult:
|
|
399
449
|
f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
|
400
450
|
try:
|
401
451
|
result = run_when(spec_uri, triple_store, when)
|
452
|
+
log.info(
|
453
|
+
f"run {when.queryType} spec {spec_uri} on {triple_store['type']} {result=}")
|
402
454
|
except ParseException as e:
|
455
|
+
log.error(
|
456
|
+
f"parseException {e}")
|
403
457
|
return SparqlParseFailure(spec_uri, triple_store["type"], e)
|
404
458
|
except NotImplementedError as ex:
|
405
|
-
|
459
|
+
log.error(f"NotImplementedError {ex}")
|
460
|
+
raise ex
|
461
|
+
# return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
|
406
462
|
return check_result(spec, result)
|
407
463
|
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout, OSError) as e:
|
408
464
|
# close_connection = False
|
@@ -413,7 +469,9 @@ def run_spec(spec: Specification) -> SpecResult:
|
|
413
469
|
except (TypeError, RequestException) as e:
|
414
470
|
log.error(f"{type(e)} {e}")
|
415
471
|
return SparqlExecutionError(spec_uri, triple_store["type"], e)
|
416
|
-
|
472
|
+
except Exception as e:
|
473
|
+
log.error(f"Unexpected error {e}")
|
474
|
+
return RuntimeError(spec_uri, triple_store["type"], f"{type(e).__name__}: {e}")
|
417
475
|
# https://github.com/Semantic-partners/mustrd/issues/78
|
418
476
|
# finally:
|
419
477
|
# if type(mustrd_triple_store) == MustrdAnzo and close_connection:
|
@@ -724,33 +782,33 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
|
|
724
782
|
return expected_results
|
725
783
|
|
726
784
|
|
727
|
-
def write_result_diff_to_log(res):
|
785
|
+
def write_result_diff_to_log(res, info):
|
728
786
|
if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
|
729
|
-
|
730
|
-
|
731
|
-
|
787
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
788
|
+
info(f"{Fore.BLUE} In Expected Not In Actual:")
|
789
|
+
info(
|
732
790
|
res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
|
733
|
-
|
734
|
-
|
791
|
+
info(f"{Fore.RED} in_actual_not_in_expected")
|
792
|
+
info(
|
735
793
|
res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
|
736
|
-
|
737
|
-
|
794
|
+
info(f"{Fore.GREEN} in_both")
|
795
|
+
info(res.graph_comparison.in_both.serialize(format="ttl"))
|
738
796
|
|
739
797
|
if isinstance(res, SelectSpecFailure):
|
740
|
-
|
741
|
-
|
742
|
-
|
798
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
799
|
+
info(res.message)
|
800
|
+
info(res.table_comparison.to_markdown())
|
743
801
|
if isinstance(res, SpecPassedWithWarning):
|
744
|
-
|
802
|
+
info(
|
745
803
|
f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
|
746
|
-
|
804
|
+
info(res.warning)
|
747
805
|
if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
|
748
806
|
isinstance(res, SparqlParseFailure):
|
749
|
-
|
750
|
-
|
807
|
+
info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
808
|
+
info(res.exception)
|
751
809
|
if isinstance(res, SpecSkipped):
|
752
|
-
|
753
|
-
|
810
|
+
info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
811
|
+
info(res.message)
|
754
812
|
|
755
813
|
|
756
814
|
def calculate_row_difference(df1: pandas.DataFrame,
|
@@ -29,6 +29,7 @@ from mustrd.anzo_utils import query_azg, query_graphmart
|
|
29
29
|
from mustrd.anzo_utils import query_configuration, json_to_dictlist, ttl_to_graph
|
30
30
|
|
31
31
|
|
32
|
+
|
32
33
|
def execute_select(triple_store: dict, when: str, bindings: dict = None) -> str:
|
33
34
|
try:
|
34
35
|
if bindings:
|
@@ -39,7 +40,7 @@ def execute_select(triple_store: dict, when: str, bindings: dict = None) -> str
|
|
39
40
|
f"FROM <{triple_store['input_graph']}>\nFROM <{triple_store['output_graph']}>").replace(
|
40
41
|
"${targetGraph}", f"<{triple_store['output_graph']}>")
|
41
42
|
# TODO: manage results here
|
42
|
-
return query_azg(anzo_config=triple_store, query=when)
|
43
|
+
return query_azg(anzo_config=triple_store, query=when, data_layers=[triple_store['input_graph']])
|
43
44
|
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
44
45
|
raise
|
45
46
|
|
@@ -58,7 +59,7 @@ USING <{triple_store['output_graph']}>""").replace(
|
|
58
59
|
"${targetGraph}", f"<{output_graph}>")
|
59
60
|
|
60
61
|
response = query_azg(anzo_config=triple_store, query=substituted_query, is_update=True,
|
61
|
-
data_layers=input_graph, format="ttl")
|
62
|
+
data_layers=[input_graph, output_graph], format="ttl")
|
62
63
|
logging.debug(f'response {response}')
|
63
64
|
# TODO: deal with error responses
|
64
65
|
new_graph = ttl_to_graph(query_azg(anzo_config=triple_store, query="construct {?s ?p ?o} { ?s ?p ?o }",
|