mustrd 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.adoc +0 -177
- mustrd/TestResult.py +1 -1
- mustrd/mustrd.py +90 -99
- mustrd/mustrdAnzo.py +30 -15
- mustrd/mustrdTestPlugin.py +48 -41
- mustrd/namespace.py +9 -8
- mustrd/run.py +3 -4
- mustrd/spec_component.py +109 -98
- mustrd/test/test_mustrd.py +1 -1
- mustrd/utils.py +1 -0
- {mustrd-0.2.1.dist-info → mustrd-0.2.2.dist-info}/METADATA +10 -5
- {mustrd-0.2.1.dist-info → mustrd-0.2.2.dist-info}/RECORD +15 -15
- {mustrd-0.2.1.dist-info → mustrd-0.2.2.dist-info}/LICENSE +0 -0
- {mustrd-0.2.1.dist-info → mustrd-0.2.2.dist-info}/WHEEL +0 -0
- {mustrd-0.2.1.dist-info → mustrd-0.2.2.dist-info}/entry_points.txt +0 -0
mustrd/README.adoc
CHANGED
@@ -22,183 +22,6 @@ For a brief explanation of the meaning of these options use the help option.
|
|
22
22
|
|
23
23
|
Run `pytest` from the project root.
|
24
24
|
|
25
|
-
== Creating your own Test Specifications
|
26
|
-
|
27
|
-
If you have got this far then you are probably ready to create your own specifications to test your application SPARQL queries. These will be executed against the default RDFLib triplestore unless you configure one or more alternatives. The instructions for this are included in <<Configuring external triplestores>> below.
|
28
|
-
|
29
|
-
=== Paths
|
30
|
-
All paths are consired relative. That way mustrd tests can be versionned and shared easily.
|
31
|
-
To get absolute path from relative path in a spec file, we prefix it with the first existing result in:
|
32
|
-
1) Path where the spec is located
|
33
|
-
2) spec_path defined in mustrd test configuration files or cmd line argument
|
34
|
-
3) data_path defined in mustrd test configuration files or cmd line argument
|
35
|
-
4) Mustrd folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
|
36
|
-
We intentionally use the same method to build paths in all spec components to avoid confusion.
|
37
|
-
|
38
|
-
=== Givens
|
39
|
-
These are used to specify the dataset against which the SPARQL statement will be run.
|
40
|
-
They can be generated from external sources such as an existing graph, or a file or folder containing serialised RDF. It is also possible to specify the dataset as reified RDF directly in the test step. Currently tabular data sources such as csv files or TableDatasets are not supported.
|
41
|
-
Multiple given statements can be supplied and data is combined into a single dataset for the test.
|
42
|
-
|
43
|
-
* *InheritedDataset* - This is where no data is specified but the existing data in the target graph is retained rather than being replaced with a defined set. This can be used to chain tests together or to perform checks on application data.
|
44
|
-
----
|
45
|
-
must:given [ a must:InheritedDataset ] ;
|
46
|
-
----
|
47
|
-
* *FileDataset* - The dataset is a local file containing serialised RDF. The formats supported are the same as those for the RDFLib Graph().parse function i.e. Turtle (.ttl), NTriples (.nt), N3 (.n3), RDF/XML (.xml) and TriX. The data is used to replace any existing content in the target graph for the test.
|
48
|
-
----
|
49
|
-
must:given [ a must:FileDataset ;
|
50
|
-
must:file "test/data/given.ttl" . ] ;
|
51
|
-
----
|
52
|
-
* *FolderDataset* - Very similar to the file dataset except that the location of the file is passed to the test specification as an argument from the caller. i.e. the -g option on the command line.
|
53
|
-
----
|
54
|
-
must:given [ a must:FolderDataset ;
|
55
|
-
must:fileName "given.ttl" ] ;
|
56
|
-
----
|
57
|
-
* *StatementsDataset* - The dataset is defined within the test in the form of reified RDF statements. e.g.
|
58
|
-
----
|
59
|
-
must:given [ a must:StatementsDataset ;
|
60
|
-
must:hasStatement [ a rdf:Statement ;
|
61
|
-
rdf:subject test-data:sub ;
|
62
|
-
rdf:predicate test-data:pred ;
|
63
|
-
rdf:object test-data:obj ; ] ; ] ;
|
64
|
-
----
|
65
|
-
* *AnzoGraphmartDataset* - The dataset is contained in an Anzo graphmart and needs to be retrieved from there. The Anzo instance containing the dataset needs to be indicated in the configuration file as documented in <<Configuring external triplestores>>.
|
66
|
-
----
|
67
|
-
must:given [ a must:AnzoGraphmartDataset ;
|
68
|
-
must:graphmart "http://cambridgesemantics.com/Graphmart/43445aeadf674e09818c81cf7049e46a";
|
69
|
-
must:layer "http://cambridgesemantics.com/Layer/33b97531d7e148748b75e4e3c6bbf164";
|
70
|
-
] .
|
71
|
-
----
|
72
|
-
=== Whens
|
73
|
-
These are the actual SPARQL queries that you wish to test. Queries can be supplied as a string directly in the test or as a file containing the query. Only single When statements are currently supported.
|
74
|
-
Mustrd does not derive the query type from the actual query, so it is necessary to provide this in the specification. Supported query types are SelectSparql, ConstructSparql and UpdateSparql.
|
75
|
-
|
76
|
-
* *TextSparqlSource* - The SPARQL query is included in the test as a (multiline) string value for the property queryText.
|
77
|
-
e.g.
|
78
|
-
----
|
79
|
-
must:when [ a must:TextSparqlSource ;
|
80
|
-
must:queryText "SELECT ?s ?p ?o WHERE { ?s ?p ?o }" ;
|
81
|
-
must:queryType must:SelectSparql ] ;
|
82
|
-
----
|
83
|
-
|
84
|
-
* *FileSparqlSource* - The SPARQL query is contained in a local file.
|
85
|
-
e.g.
|
86
|
-
----
|
87
|
-
must:when [ a must:FileSparqlSource ;
|
88
|
-
must:file "test/data/construct.rq" ;
|
89
|
-
must:queryType must:ConstructSparql ; ] ;
|
90
|
-
----
|
91
|
-
* *FolderSparqlSource* - Similar to the file SPARQL source except that the location of the file is passed to the test specification as an argument from the caller. i.e. the -w option on the command line.
|
92
|
-
----
|
93
|
-
must:when [ a must:FolderSparqlSource ;
|
94
|
-
must:fileName "construct.rq" ;
|
95
|
-
must:queryType must:ConstructSparql ; ] ;
|
96
|
-
----
|
97
|
-
* *AnzoQueryBuilderDataset* - The query is saved in the Query Builder of an Anzo instance and needs to be retrieved from there. The Anzo instance containing the dataset needs to be indicated in the configuration file as documented in <<Configuring external triplestores>>.
|
98
|
-
----
|
99
|
-
must:when [ a must:AnzoQueryBuilderDataset ;
|
100
|
-
must:queryFolder "Mustrd";
|
101
|
-
must:queryName "mustrd-construct" ;
|
102
|
-
must:queryType must:ConstructSparql
|
103
|
-
];
|
104
|
-
----
|
105
|
-
=== Thens
|
106
|
-
Then clauses are used to specify the expected result dataset for the test. These datasets can be specified in the same way as <<Givens>> except that an extended set of dataset types is supported. For the tabular results of SELECT queries TabularDatasets are required and again can be in file format such as CSV, or an inline table within the specification.
|
107
|
-
* *FileDataset* - The dataset is a local file containing serialised RDF or tabular data. The formats supported are the same as those for the RDFLib Graph().parse function i.e. Turtle (.ttl), NTriples (.nt), N3 (.n3), RDF/XML (.xml) and TriX, as well as tabular formats (.csv, .xls, .xlsx).
|
108
|
-
----
|
109
|
-
must:then [ a must:FileDataset ;
|
110
|
-
must:file "test/data/thenSuccess.xlsx" ] .
|
111
|
-
----
|
112
|
-
----
|
113
|
-
must:then [ a must:FileDataset ;
|
114
|
-
must:file "test/data/thenSuccess.nt" ] .
|
115
|
-
----
|
116
|
-
* *FolderDataset* - Very similar to the file dataset except that the location of the file is passed to the test specification as an argument from the caller. i.e. the -t option on the command line.
|
117
|
-
----
|
118
|
-
must:then [ a must:FolderDataset ;
|
119
|
-
must:fileName "then.ttl" ] ;
|
120
|
-
----
|
121
|
-
* *StatementsDataset* - The dataset is defined within the test in the form of reified RDF statements e.g.
|
122
|
-
----
|
123
|
-
must:then [ a must:StatementsDataset ;
|
124
|
-
must:hasStatement [ a rdf:Statement ;
|
125
|
-
rdf:subject test-data:sub ;
|
126
|
-
rdf:predicate test-data:pred ;
|
127
|
-
rdf:object test-data:obj ; ] ; ] ;
|
128
|
-
----
|
129
|
-
* *TableDataset* - The contents of the table defined in RDF syntax within the specification.
|
130
|
-
E.g. a table dataset consisting of a single row and three columns.
|
131
|
-
----
|
132
|
-
must:then [ a must:TableDataset ;
|
133
|
-
must:hasRow [ must:hasBinding[
|
134
|
-
must:variable "s" ;
|
135
|
-
must:boundValue test-data:sub ; ],
|
136
|
-
[ must:variable "p" ;
|
137
|
-
must:boundValue test-data:pred ; ],
|
138
|
-
[ must:variable "o" ;
|
139
|
-
must:boundValue test-data:obj ; ] ;
|
140
|
-
] ; ] .
|
141
|
-
----
|
142
|
-
* *OrderedTableDataset* - This is an extension of the TableDataset which allows the row order of the dataset to be specified using the SHACL order property to support the ORDER BY clause in SPARQL SELECT queries
|
143
|
-
E.g. A table dataset consisting of two ordered rows and three columns.
|
144
|
-
----
|
145
|
-
must:then [ a must:OrderedTableDataset ;
|
146
|
-
must:hasRow [ sh:order 1 ;
|
147
|
-
must:hasBinding[ must:variable "s" ;
|
148
|
-
must:boundValue test-data:sub1 ; ],
|
149
|
-
[ must:variable "p" ;
|
150
|
-
must:boundValue test-data:pred1 ; ],
|
151
|
-
[ must:variable "o" ;
|
152
|
-
must:boundValue test-data:obj1 ; ] ; ] ,
|
153
|
-
[ sh:order 2 ;
|
154
|
-
must:hasBinding[ must:variable "s" ;
|
155
|
-
must:boundValue test-data:sub2 ; ],
|
156
|
-
[ must:variable "p" ;
|
157
|
-
must:boundValue test-data:pred2 ; ],
|
158
|
-
[ must:variable "o" ;
|
159
|
-
must:boundValue test-data:obj2 ; ] ; ] ;
|
160
|
-
] .
|
161
|
-
----
|
162
|
-
* *EmptyTable* - This is used to indicate that we are expecting an empty result from a SPARQL SELECT query.
|
163
|
-
----
|
164
|
-
must:then [ a must:EmptyTable ] .
|
165
|
-
----
|
166
|
-
* *EmptyGraph* - Similar to EmptyTable but used to indicate that we are expecting an empty graph as a result from a SPARQL query.
|
167
|
-
----
|
168
|
-
must:then [ a must:EmptyGraph ] .
|
169
|
-
----
|
170
|
-
* *AnzoGraphmartDataset* - The dataset is contained in an Anzo graphmart and needs to be retrieved from there. The Anzo instance containing the dataset needs to be indicated in the configuration file as documented in <<Configuring external triplestores>>.
|
171
|
-
----
|
172
|
-
must:then [ a must:AnzoGraphmartDataset ;
|
173
|
-
must:graphmart "http://cambridgesemantics.com/Graphmart/43445aeadf674e09818c81cf7049e46a";
|
174
|
-
must:layer "http://cambridgesemantics.com/Layer/33b97531d7e148748b75e4e3c6bbf164";
|
175
|
-
] .
|
176
|
-
----
|
177
|
-
== Configuring external triplestores
|
178
|
-
The configuration file for external triplestores can be located outside of the project root as it is specified as an argument to the mustard module or as the -c option on the commandline when running run.py.
|
179
|
-
|
180
|
-
It is anticipated that the external triplestore is running as mustrd is not configured to start them.
|
181
|
-
|
182
|
-
Currently, the supported external triplestores are GraphDB and Anzo.
|
183
|
-
|
184
|
-
The configuration file should be serialised RDF. An example in Turtle format is included below for GraphDB. For Anzo the *must:repository* value is replaced with a *must:gqeURI*.
|
185
|
-
----
|
186
|
-
@prefix must: <https://mustrd.com/model/> .
|
187
|
-
must:GraphDbConfig1 a must:GraphDbConfig ;
|
188
|
-
must:url "http://localhost";
|
189
|
-
must:port "7200";
|
190
|
-
must:inputGraph "http://localhost:7200/test-graph" ;
|
191
|
-
must:repository "mustrd" .
|
192
|
-
----
|
193
|
-
To avoid versioning secrets when you want to version triplestore configuration (for example in case you want to run mustrd in CI), you have to configure user/password in a different file.
|
194
|
-
This file must be named as the triple store configuration file, but with "_secrets" just before the extension. For example triplestores.ttl -> triplestores_secrets.ttl
|
195
|
-
Subjects in the two files must match, no need to redefine the type, for example:
|
196
|
-
----
|
197
|
-
@prefix must: <https://mustrd.com/model/> .
|
198
|
-
must:GraphDbConfig1 must:username 'test' ;
|
199
|
-
must:password 'test' .
|
200
|
-
----
|
201
|
-
|
202
25
|
== Additional Notes for Developers
|
203
26
|
Mustrd remains very much under development. It is anticipated that additional functionality and triplestore support will be added over time. The project uses https://python-poetry.org/docs/[Poetry] to manage dependencies so it will be necessary to have this installed to contribute towards the project. The link contains instructions on how to install and use this.
|
204
27
|
As the project is actually built from the requirements.txt file at the project root, it is necessary to export dependencies from poetry to this file before committing and pushing changes to the repository, using the following command.
|
mustrd/TestResult.py
CHANGED
@@ -43,7 +43,7 @@ class testStatus(Enum):
|
|
43
43
|
SKIPPED = "skipped"
|
44
44
|
|
45
45
|
|
46
|
-
TEMPLATE_FOLDER =
|
46
|
+
TEMPLATE_FOLDER = Path(os.path.join(get_mustrd_root(), "templates/"))
|
47
47
|
|
48
48
|
|
49
49
|
RESULT_LIST_MD_TEMPLATE = "md_ResultList_template.jinja"
|
mustrd/mustrd.py
CHANGED
@@ -46,12 +46,12 @@ import json
|
|
46
46
|
from pandas import DataFrame
|
47
47
|
|
48
48
|
from .spec_component import TableThenSpec, parse_spec_component, WhenSpec, ThenSpec
|
49
|
-
from .utils import
|
49
|
+
from .utils import is_json, get_mustrd_root
|
50
50
|
from colorama import Fore, Style
|
51
51
|
from tabulate import tabulate
|
52
52
|
from collections import defaultdict
|
53
53
|
from pyshacl import validate
|
54
|
-
import logging
|
54
|
+
import logging
|
55
55
|
from http.client import HTTPConnection
|
56
56
|
from .steprunner import upload_given, run_when
|
57
57
|
|
@@ -73,6 +73,7 @@ def debug_requests_on():
|
|
73
73
|
requests_log.setLevel(logging.DEBUG)
|
74
74
|
requests_log.propagate = True
|
75
75
|
|
76
|
+
|
76
77
|
def debug_requests_off():
|
77
78
|
'''Switches off logging of the requests module, might be some side-effects'''
|
78
79
|
HTTPConnection.debuglevel = 0
|
@@ -84,8 +85,10 @@ def debug_requests_off():
|
|
84
85
|
requests_log.setLevel(logging.WARNING)
|
85
86
|
requests_log.propagate = False
|
86
87
|
|
88
|
+
|
87
89
|
debug_requests_off()
|
88
90
|
|
91
|
+
|
89
92
|
@dataclass
|
90
93
|
class Specification:
|
91
94
|
spec_uri: URIRef
|
@@ -234,25 +237,18 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
234
237
|
if len(error_messages) > 0:
|
235
238
|
error_messages.sort()
|
236
239
|
error_message = "\n".join(msg for msg in error_messages)
|
237
|
-
invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
|
238
|
-
|
240
|
+
invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
|
241
|
+
for triple_store in triple_stores]
|
239
242
|
else:
|
240
243
|
subject_uris.add(subject_uri)
|
241
244
|
this_spec_graph = Graph()
|
242
245
|
this_spec_graph.parse(file)
|
243
246
|
spec_uris_in_this_file = list(this_spec_graph.subjects(RDF.type, MUST.TestSpec))
|
244
247
|
for spec in spec_uris_in_this_file:
|
245
|
-
# print(f"adding {tripleToAdd}")
|
246
248
|
this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
|
247
249
|
this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
|
248
|
-
# print(f"beforeadd: {spec_graph}" )
|
249
|
-
# print(f"beforeadd: {str(this_spec_graph.serialize())}" )
|
250
250
|
spec_graph += this_spec_graph
|
251
251
|
|
252
|
-
|
253
|
-
sourceFiles = list(spec_graph.subject_objects(MUST.specSourceFile))
|
254
|
-
# print(f"sourceFiles: {sourceFiles}")
|
255
|
-
|
256
252
|
valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
|
257
253
|
|
258
254
|
if focus_uris:
|
@@ -264,7 +260,7 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
|
|
264
260
|
log.info(f"Collected {len(focus_uris)} focus test spec(s)")
|
265
261
|
return focus_uris, spec_graph, invalid_focus_specs
|
266
262
|
else:
|
267
|
-
log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
|
263
|
+
log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
|
268
264
|
return valid_spec_uris, spec_graph, invalid_specs
|
269
265
|
|
270
266
|
|
@@ -276,14 +272,16 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
|
|
276
272
|
for triple_store in triple_stores:
|
277
273
|
if "error" in triple_store:
|
278
274
|
log.error(f"{triple_store['error']}. No specs run for this triple store.")
|
279
|
-
skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
|
275
|
+
skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
|
276
|
+
get_spec_file(spec_uri, spec_graph)) for spec_uri in
|
280
277
|
spec_uris]
|
281
278
|
else:
|
282
279
|
for spec_uri in spec_uris:
|
283
280
|
try:
|
284
281
|
specs += [get_spec(spec_uri, spec_graph, run_config, triple_store)]
|
285
282
|
except (ValueError, FileNotFoundError, ConnectionError) as e:
|
286
|
-
skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
|
283
|
+
skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
|
284
|
+
e, get_spec_file(spec_uri, spec_graph))]
|
287
285
|
|
288
286
|
except (BadSyntax, FileNotFoundError) as e:
|
289
287
|
template = "An exception of type {0} occurred when trying to parse the triple store configuration file. " \
|
@@ -303,25 +301,28 @@ def run_specs(specs) -> List[SpecResult]:
|
|
303
301
|
results.append(run_spec(specification))
|
304
302
|
return results
|
305
303
|
|
304
|
+
|
306
305
|
def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
|
307
|
-
return str(spec_graph.value(subject
|
306
|
+
return str(spec_graph.value(subject=spec_uri, predicate=MUST.specFileName, default="default.mustrd.ttl"))
|
307
|
+
|
308
308
|
|
309
309
|
def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
|
310
310
|
try:
|
311
|
-
if mustrd_triple_store
|
311
|
+
if not mustrd_triple_store:
|
312
312
|
mustrd_triple_store = {"type": TRIPLESTORE.RdfLib}
|
313
313
|
components = []
|
314
314
|
for predicate in MUST.given, MUST.when, MUST.then:
|
315
315
|
components.append(parse_spec_component(subject=spec_uri,
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
316
|
+
predicate=predicate,
|
317
|
+
spec_graph=spec_graph,
|
318
|
+
run_config=run_config,
|
319
|
+
mustrd_triple_store=mustrd_triple_store))
|
320
320
|
|
321
321
|
spec_file_name = get_spec_file(spec_uri, spec_graph)
|
322
322
|
# https://github.com/Semantic-partners/mustrd/issues/92
|
323
|
-
return Specification(spec_uri, mustrd_triple_store,
|
324
|
-
|
323
|
+
return Specification(spec_uri, mustrd_triple_store,
|
324
|
+
components[0].value, components[1], components[2], spec_file_name)
|
325
|
+
|
325
326
|
except (ValueError, FileNotFoundError) as e:
|
326
327
|
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
|
327
328
|
message = template.format(type(e).__name__, e.args)
|
@@ -333,7 +334,7 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
|
|
333
334
|
|
334
335
|
|
335
336
|
def check_result(spec, result):
|
336
|
-
if
|
337
|
+
if isinstance(spec.then, TableThenSpec):
|
337
338
|
return table_comparison(result, spec)
|
338
339
|
else:
|
339
340
|
graph_compare = graph_comparison(spec.then.value, result)
|
@@ -383,27 +384,30 @@ def run_spec(spec: Specification) -> SpecResult:
|
|
383
384
|
# if type(mustrd_triple_store) == MustrdAnzo and close_connection:
|
384
385
|
# mustrd_triple_store.clear_graph()
|
385
386
|
|
387
|
+
|
386
388
|
def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
|
387
389
|
if secrets:
|
388
|
-
return Graph().parse(triple_store_graph_path).parse(data
|
390
|
+
return Graph().parse(triple_store_graph_path).parse(data=secrets)
|
389
391
|
else:
|
390
|
-
secret_path = triple_store_graph_path.parent / Path(triple_store_graph_path.stem +
|
392
|
+
secret_path = triple_store_graph_path.parent / Path(triple_store_graph_path.stem +
|
393
|
+
"_secrets" + triple_store_graph_path.suffix)
|
391
394
|
return Graph().parse(triple_store_graph_path).parse(secret_path)
|
392
|
-
|
395
|
+
|
393
396
|
|
394
397
|
def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
395
398
|
triple_stores = []
|
396
399
|
shacl_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
|
397
400
|
ont_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
|
398
401
|
conforms, results_graph, results_text = validate(
|
399
|
-
data_graph=
|
400
|
-
shacl_graph
|
401
|
-
ont_graph
|
402
|
-
advanced=
|
403
|
-
inference=
|
402
|
+
data_graph=triple_store_graph,
|
403
|
+
shacl_graph=shacl_graph,
|
404
|
+
ont_graph=ont_graph,
|
405
|
+
advanced=True,
|
406
|
+
inference='none'
|
404
407
|
)
|
405
408
|
if not conforms:
|
406
|
-
raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
|
409
|
+
raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
|
410
|
+
results_graph)
|
407
411
|
for triple_store_config, rdf_type, triple_store_type in triple_store_graph.triples((None, RDF.type, None)):
|
408
412
|
triple_store = {}
|
409
413
|
triple_store["type"] = triple_store_type
|
@@ -413,15 +417,18 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
|
413
417
|
triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
|
414
418
|
triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
|
415
419
|
try:
|
416
|
-
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
417
|
-
|
420
|
+
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
421
|
+
predicate=TRIPLESTORE.username))
|
422
|
+
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
423
|
+
predicate=TRIPLESTORE.password))
|
418
424
|
except (FileNotFoundError, ValueError) as e:
|
419
425
|
triple_store["error"] = e
|
420
|
-
triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
|
426
|
+
triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
|
427
|
+
predicate=TRIPLESTORE.gqeURI)
|
421
428
|
triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
|
422
429
|
predicate=TRIPLESTORE.inputGraph)
|
423
430
|
triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
|
424
|
-
|
431
|
+
predicate=TRIPLESTORE.outputGraph)
|
425
432
|
try:
|
426
433
|
check_triple_store_params(triple_store, ["url", "port", "username", "password", "input_graph"])
|
427
434
|
except ValueError as e:
|
@@ -431,8 +438,10 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
|
|
431
438
|
triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
|
432
439
|
triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
|
433
440
|
try:
|
434
|
-
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
435
|
-
|
441
|
+
triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
|
442
|
+
predicate=TRIPLESTORE.username))
|
443
|
+
triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
|
444
|
+
predicate=TRIPLESTORE.password))
|
436
445
|
except (FileNotFoundError, ValueError) as e:
|
437
446
|
log.error(f"Credential retrieval failed {e}")
|
438
447
|
triple_store["error"] = e
|
@@ -461,11 +470,9 @@ def check_triple_store_params(triple_store: dict, required_params: List[str]):
|
|
461
470
|
|
462
471
|
def get_credential_from_file(triple_store_name: URIRef, credential: str, config_path: Literal) -> str:
|
463
472
|
log.info(f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
|
464
|
-
if config_path
|
473
|
+
if not config_path:
|
465
474
|
raise ValueError(f"Cannot establish connection defined in {triple_store_name}. "
|
466
475
|
f"Missing required parameter: {credential}.")
|
467
|
-
# if os.path.isrelative(config_path)
|
468
|
-
# project_root = get_project_root()
|
469
476
|
path = Path(config_path)
|
470
477
|
log.info(f"get_credential_from_file {path}")
|
471
478
|
|
@@ -480,6 +487,7 @@ def get_credential_from_file(triple_store_name: URIRef, credential: str, config_
|
|
480
487
|
raise ValueError(f"Error reading credentials config file: {e}")
|
481
488
|
return config[str(triple_store_name)][credential]
|
482
489
|
|
490
|
+
|
483
491
|
# Convert sparql json query results as defined in https://www.w3.org/TR/rdf-sparql-json-res/
|
484
492
|
def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
|
485
493
|
json_result = json.loads(result)
|
@@ -534,7 +542,8 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
534
542
|
|
535
543
|
# Scenario 1: expected no result but got a result
|
536
544
|
if then.empty:
|
537
|
-
message = f"Expected 0 row(s) and 0 column(s),
|
545
|
+
message = f"""Expected 0 row(s) and 0 column(s),
|
546
|
+
got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"""
|
538
547
|
empty_then = create_empty_dataframe_with_columns(df)
|
539
548
|
df_diff = empty_then.compare(df, result_names=("expected", "actual"))
|
540
549
|
|
@@ -546,14 +555,6 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
546
555
|
if ordered_result is True and not spec.then.ordered:
|
547
556
|
message += ". Actual result is ordered, must:then must contain sh:order on every row."
|
548
557
|
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
|
549
|
-
# if df.shape == then.shape and (df.columns == then.columns).all():
|
550
|
-
# df_diff = then.compare(df, result_names=("expected", "actual"))
|
551
|
-
# if df_diff.empty:
|
552
|
-
# df_diff = df
|
553
|
-
# print(df_diff.to_markdown())
|
554
|
-
# else:
|
555
|
-
# df_diff = construct_df_diff(df, then)
|
556
|
-
# print(df_diff.to_markdown())
|
557
558
|
else:
|
558
559
|
if len(columns) == len(then.columns):
|
559
560
|
if sorted_columns == sorted_then_cols:
|
@@ -579,15 +580,15 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
579
580
|
|
580
581
|
if then.empty:
|
581
582
|
# Scenario 3: expected no result, got no result
|
582
|
-
message =
|
583
|
+
message = "Expected 0 row(s) and 0 column(s), got 0 row(s) and 0 column(s)"
|
583
584
|
df = pandas.DataFrame()
|
584
585
|
else:
|
585
586
|
# Scenario 4: expected a result, but got an empty result
|
586
|
-
message = f"Expected {then.shape[0]} row(s)
|
587
|
+
message = f"""Expected {then.shape[0]} row(s)
|
588
|
+
and {round(then.shape[1] / 2)} column(s), got 0 row(s) and 0 column(s)"""
|
587
589
|
then = then[sorted_then_cols]
|
588
590
|
df = create_empty_dataframe_with_columns(then)
|
589
591
|
df_diff = then.compare(df, result_names=("expected", "actual"))
|
590
|
-
print(df_diff.to_markdown())
|
591
592
|
|
592
593
|
if df_diff.empty:
|
593
594
|
if warning:
|
@@ -595,13 +596,8 @@ def table_comparison(result: str, spec: Specification) -> SpecResult:
|
|
595
596
|
else:
|
596
597
|
return SpecPassed(spec.spec_uri, spec.triple_store["type"])
|
597
598
|
else:
|
598
|
-
|
599
|
+
log.error("\n" + df_diff.to_markdown())
|
599
600
|
log.error(message)
|
600
|
-
# print(spec.spec_uri)
|
601
|
-
# print("actual:")
|
602
|
-
# print(then)
|
603
|
-
# print("expected:")
|
604
|
-
# print(df)
|
605
601
|
return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
|
606
602
|
|
607
603
|
except ParseException as e:
|
@@ -622,18 +618,18 @@ def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphCompari
|
|
622
618
|
|
623
619
|
def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
|
624
620
|
then_query = f"""
|
625
|
-
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
621
|
+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
626
622
|
|
627
623
|
CONSTRUCT {{ ?s ?p ?o }}
|
628
624
|
{{
|
629
|
-
<{spec_uri}> <{MUST.then}>
|
625
|
+
<{spec_uri}> <{MUST.then}>
|
630
626
|
a <{MUST.StatementsDataset}> ;
|
631
627
|
<{MUST.hasStatement}> [
|
632
628
|
a rdf:Statement ;
|
633
629
|
rdf:subject ?s ;
|
634
630
|
rdf:predicate ?p ;
|
635
631
|
rdf:object ?o ;
|
636
|
-
] ; ]
|
632
|
+
] ; ]
|
637
633
|
}}
|
638
634
|
"""
|
639
635
|
expected_results = spec_graph.query(then_query).graph
|
@@ -707,7 +703,7 @@ def create_empty_dataframe_with_columns(df: pandas.DataFrame) -> pandas.DataFram
|
|
707
703
|
|
708
704
|
|
709
705
|
def review_results(results: List[SpecResult], verbose: bool) -> None:
|
710
|
-
|
706
|
+
log.info("===== Result Overview =====")
|
711
707
|
# Init dictionaries
|
712
708
|
status_dict = defaultdict(lambda: defaultdict(int))
|
713
709
|
status_counts = defaultdict(lambda: defaultdict(int))
|
@@ -723,7 +719,8 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
723
719
|
|
724
720
|
# Convert dictionaries to list for tabulate
|
725
721
|
table_rows = [[spec_uri] + [
|
726
|
-
f"{colours.get(status_dict[spec_uri][triple_store], Fore.RED)}
|
722
|
+
f"""{colours.get(status_dict[spec_uri][triple_store], Fore.RED)}
|
723
|
+
{status_dict[spec_uri][triple_store].__name__}{Style.RESET_ALL}"""
|
727
724
|
for triple_store in triple_stores] for spec_uri in set(status_dict.keys())]
|
728
725
|
|
729
726
|
status_rows = [[f"{colours.get(status, Fore.RED)}{status.__name__}{Style.RESET_ALL}"] +
|
@@ -731,8 +728,8 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
731
728
|
for triple_store in triple_stores] for status in set(statuses)]
|
732
729
|
|
733
730
|
# Display tables with tabulate
|
734
|
-
|
735
|
-
|
731
|
+
log.info(tabulate(table_rows, headers=['Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
|
732
|
+
log.info(tabulate(status_rows, headers=['Status / triple stores'] + triple_stores, tablefmt="pretty"))
|
736
733
|
|
737
734
|
pass_count = statuses.count(SpecPassed)
|
738
735
|
warning_count = statuses.count(SpecPassedWithWarning)
|
@@ -748,40 +745,34 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
|
|
748
745
|
overview_colour = Fore.GREEN
|
749
746
|
|
750
747
|
logger_setup.flush()
|
751
|
-
|
748
|
+
log.info(f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
|
752
749
|
f"{overview_colour}{warning_count} passed with warnings =====")
|
753
750
|
|
754
751
|
if verbose and (fail_count or warning_count or skipped_count):
|
755
752
|
for res in results:
|
756
|
-
if
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
if
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
if
|
771
|
-
|
772
|
-
if
|
773
|
-
|
774
|
-
|
775
|
-
if
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
if
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
753
|
+
if isinstance(res, UpdateSpecFailure):
|
754
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
755
|
+
log.info(f"{Fore.BLUE} In Expected Not In Actual:")
|
756
|
+
log.info(res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
|
757
|
+
log.info()
|
758
|
+
log.info(f"{Fore.RED} in_actual_not_in_expected")
|
759
|
+
log.info(res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
|
760
|
+
log.info(f"{Fore.GREEN} in_both")
|
761
|
+
log.info(res.graph_comparison.in_both.serialize(format="ttl"))
|
762
|
+
|
763
|
+
if isinstance(res, SelectSpecFailure):
|
764
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
765
|
+
log.info(res.message)
|
766
|
+
log.info(res.table_comparison.to_markdown())
|
767
|
+
if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
|
768
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
769
|
+
if isinstance(res, SpecPassedWithWarning):
|
770
|
+
log.info(f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
|
771
|
+
log.info(res.warning)
|
772
|
+
if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
|
773
|
+
isinstance(res, SparqlParseFailure):
|
774
|
+
log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
|
775
|
+
log.info(res.exception)
|
776
|
+
if isinstance(res, SpecSkipped):
|
777
|
+
log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
|
778
|
+
log.info(res.message)
|