mustrd 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mustrd-0.1.8 → mustrd-0.2.0}/PKG-INFO +1 -1
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/README.adoc +9 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrd.py +1 -2
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdAnzo.py +61 -77
- mustrd-0.2.0/mustrd/mustrdQueryProcessor.py +136 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/spec_component.py +49 -41
- {mustrd-0.1.8 → mustrd-0.2.0}/pyproject.toml +1 -1
- {mustrd-0.1.8 → mustrd-0.2.0}/LICENSE +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/README.adoc +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/TestResult.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/__init__.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/logger_setup.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/catalog-v001.xml +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdShapes.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdTestOntology.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdTestShapes.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/ontology.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/test-resources/resources.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/triplestoreOntology.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/triplestoreshapes.ttl +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdGraphDb.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdRdfLib.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdTestPlugin.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/namespace.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/run.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/steprunner.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_ResultList_leaf_template.jinja +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_ResultList_template.jinja +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_stats_template.jinja +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/test/test_mustrd.py +0 -0
- {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/utils.py +0 -0
@@ -26,6 +26,15 @@ Run `pytest` from the project root.
|
|
26
26
|
|
27
27
|
If you have got this far then you are probably ready to create your own specifications to test your application SPARQL queries. These will be executed against the default RDFLib triplestore unless you configure one or more alternatives. The instructions for this are included in <<Configuring external triplestores>> below.
|
28
28
|
|
29
|
+
=== Paths
|
30
|
+
All paths are consired relative. That way mustrd tests can be versionned and shared easily.
|
31
|
+
To get absolute path from relative path in a spec file, we prefix it with the first existing result in:
|
32
|
+
1) Path where the spec is located
|
33
|
+
2) spec_path defined in mustrd test configuration files or cmd line argument
|
34
|
+
3) data_path defined in mustrd test configuration files or cmd line argument
|
35
|
+
4) Mustrd folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
|
36
|
+
We intentionally use the same method to build paths in all spec components to avoid confusion.
|
37
|
+
|
29
38
|
=== Givens
|
30
39
|
These are used to specify the dataset against which the SPARQL statement will be run.
|
31
40
|
They can be generated from external sources such as an existing graph, or a file or folder containing serialised RDF. It is also possible to specify the dataset as reified RDF directly in the test step. Currently tabular data sources such as csv files or TableDatasets are not supported.
|
@@ -304,8 +304,7 @@ def run_specs(specs) -> List[SpecResult]:
|
|
304
304
|
return results
|
305
305
|
|
306
306
|
def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
|
307
|
-
|
308
|
-
return test
|
307
|
+
return str(spec_graph.value(subject = spec_uri, predicate = MUST.specFileName, default = "default.mustrd.ttl"))
|
309
308
|
|
310
309
|
def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
|
311
310
|
try:
|
@@ -28,7 +28,6 @@ from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
|
|
28
28
|
from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
|
29
29
|
from bs4 import BeautifulSoup
|
30
30
|
import logging
|
31
|
-
from .namespace import MUST
|
32
31
|
|
33
32
|
|
34
33
|
# https://github.com/Semantic-partners/mustrd/issues/73
|
@@ -43,7 +42,6 @@ def manage_anzo_response(response: Response) -> str:
|
|
43
42
|
else:
|
44
43
|
raise RequestException(f"Anzo error, status code: {response.status_code}, content: {content_string}")
|
45
44
|
|
46
|
-
|
47
45
|
def query_with_bindings(bindings: dict, when: str) -> str:
|
48
46
|
values = ""
|
49
47
|
for key, value in bindings.items():
|
@@ -52,68 +50,70 @@ def query_with_bindings(bindings: dict, when: str) -> str:
|
|
52
50
|
return f"{split_query[0].strip()} WHERE {{ {values} {split_query[1].strip()}"
|
53
51
|
|
54
52
|
def execute_select (triple_store: dict, when: str, bindings: dict = None) -> str:
|
53
|
+
if bindings:
|
54
|
+
when = query_with_bindings(bindings, when)
|
55
|
+
# Just remove ${fromSources} if we are executing a query step; the sources are defined using http parameters
|
56
|
+
when = when.replace("${fromSources}", "")
|
57
|
+
return execute_sparql(triple_store, False, when, triple_store['input_graph'])
|
58
|
+
|
59
|
+
PARAMS = {
|
60
|
+
# Update parameters for INSERT / DELETE
|
61
|
+
True: {
|
62
|
+
"default-graph-param": "using-graph-uri",
|
63
|
+
"named-graph-param":"using-named-graph-uri",
|
64
|
+
"Content-Type": "application/sparql-update"
|
65
|
+
},
|
66
|
+
# Query parameters for SELECT / CONSTRUCT
|
67
|
+
False: {
|
68
|
+
"default-graph-param": "default-graph-uri",
|
69
|
+
"named-graph-param":"named-graph-uri",
|
70
|
+
"Content-Type": "application/sparql-query"
|
71
|
+
}
|
72
|
+
}
|
73
|
+
def execute_sparql(triple_store: dict, is_update: bool, sparql, graph: str, format: str = "application/sparql-results+json"):
|
74
|
+
params = {
|
75
|
+
"format" : format,
|
76
|
+
"datasourceURI" : triple_store['gqe_uri'],
|
77
|
+
"skipCache": "true",
|
78
|
+
# Default and named datasets have different query param for query and update
|
79
|
+
PARAMS[is_update]["default-graph-param"] : graph,
|
80
|
+
PARAMS[is_update]["named-graph-param"] : graph
|
81
|
+
}
|
82
|
+
headers={"Content-Type": PARAMS[is_update]["Content-Type"]}
|
55
83
|
try:
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
return
|
64
|
-
|
65
|
-
|
66
|
-
verify=False))
|
67
|
-
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
84
|
+
response = manage_anzo_response(requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
|
85
|
+
params=params,
|
86
|
+
auth=(triple_store['username'], triple_store['password']),
|
87
|
+
headers=headers,
|
88
|
+
data=sparql,
|
89
|
+
verify=False))
|
90
|
+
logging.debug(f'response {response}')
|
91
|
+
return response
|
92
|
+
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
|
93
|
+
logging.error(f'response {e}')
|
68
94
|
raise
|
69
95
|
|
70
96
|
def execute_update(triple_store: dict, when: str, bindings: dict = None) -> Graph:
|
71
97
|
logging.debug(f"updating in anzo! {triple_store=} {when=}")
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
response = manage_anzo_response(requests.post(url=url,
|
82
|
-
auth=(triple_store['username'],
|
83
|
-
triple_store['password']),
|
84
|
-
data=data,
|
85
|
-
verify=False))
|
86
|
-
logging.debug(f'response {response}')
|
87
|
-
check_data = {'datasourceURI': triple_store['gqe_uri'], 'query': "construct {?s ?p ?o} { ?s ?p ?o }",
|
88
|
-
'default-graph-uri': output_graph, 'skipCache': 'true'}
|
89
|
-
everything_response = manage_anzo_response(requests.post(url=url,
|
90
|
-
auth=(triple_store['username'],
|
91
|
-
triple_store['password']),
|
92
|
-
data=check_data,
|
93
|
-
verify=False))
|
94
|
-
# todo deal with error responses
|
95
|
-
new_graph = Graph().parse(data=everything_response)
|
98
|
+
|
99
|
+
# FIXME If query doesn't contain ${targetGraph}, then graph should be defined explicitly in the query
|
100
|
+
substituted_query = when.replace("${usingSources}", "").replace(
|
101
|
+
"${targetGraph}", f"<{triple_store['output_graph']}>")
|
102
|
+
|
103
|
+
execute_sparql(triple_store, True, substituted_query, triple_store['input_graph'], "ttl")
|
104
|
+
|
105
|
+
new_graph = execute_construct(triple_store, "construct {?s ?p ?o} { ?s ?p ?o }")
|
106
|
+
|
96
107
|
logging.debug(f"new_graph={new_graph.serialize(format='ttl')}")
|
108
|
+
|
97
109
|
return new_graph
|
98
110
|
|
99
111
|
|
100
112
|
def execute_construct(triple_store: dict, when: str, bindings: dict = None) -> Graph:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
'default-graph-uri': triple_store['input_graph'], 'skipCache': 'true'}
|
106
|
-
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
|
107
|
-
response = requests.post(url=url,
|
108
|
-
auth=(triple_store['username'],
|
109
|
-
triple_store['password']),
|
110
|
-
data=data,
|
111
|
-
verify=False)
|
112
|
-
logging.debug(f'response {response}')
|
113
|
-
return Graph().parse(data=manage_anzo_response(response))
|
114
|
-
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
|
115
|
-
logging.error(f'response {e}')
|
116
|
-
raise
|
113
|
+
if bindings:
|
114
|
+
when = query_with_bindings(bindings, when)
|
115
|
+
response = execute_sparql(triple_store, False, when, triple_store['input_graph'], "ttl")
|
116
|
+
return Graph().parse(data=response)
|
117
117
|
|
118
118
|
|
119
119
|
# Get Given or then from the content of a graphmart
|
@@ -207,30 +207,14 @@ SELECT ?query ?param_query ?query_template
|
|
207
207
|
|
208
208
|
def upload_given(triple_store: dict, given: Graph):
|
209
209
|
logging.debug(f"upload_given {triple_store} {given}")
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
serialized_given = given.serialize(format="nt")
|
217
|
-
insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
|
218
|
-
data = {'datasourceURI': triple_store['gqe_uri'], 'update': insert_query}
|
219
|
-
response = requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
|
220
|
-
auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
|
221
|
-
manage_anzo_response(response)
|
222
|
-
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
223
|
-
raise
|
210
|
+
clear_graph(triple_store, triple_store['input_graph'])
|
211
|
+
clear_graph(triple_store, triple_store['output_graph'])
|
212
|
+
serialized_given = given.serialize(format="nt")
|
213
|
+
|
214
|
+
insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
|
215
|
+
execute_sparql(triple_store, True, insert_query, None, None)
|
224
216
|
|
225
217
|
|
226
218
|
def clear_graph(triple_store: dict, graph_uri: str):
|
227
|
-
|
228
|
-
clear_query = f"CLEAR GRAPH <{graph_uri}>"
|
229
|
-
data = {'datasourceURI': triple_store['gqe_uri'], 'update': clear_query}
|
230
|
-
url = f"https://{triple_store['url']}:{triple_store['port']}/sparql"
|
231
|
-
response = requests.post(url=url,
|
232
|
-
auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
|
233
|
-
manage_anzo_response(response)
|
234
|
-
except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
|
235
|
-
raise
|
219
|
+
execute_sparql(triple_store, True, f"CLEAR GRAPH <{graph_uri}>", None, None)
|
236
220
|
|
@@ -0,0 +1,136 @@
|
|
1
|
+
|
2
|
+
from pyparsing import ParseResults
|
3
|
+
from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, BNode
|
4
|
+
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
|
5
|
+
from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate, translateAlgebra
|
6
|
+
from rdflib.plugins.sparql.sparql import Query
|
7
|
+
from rdflib.plugins.sparql.parserutils import CompValue, value, Expr
|
8
|
+
from rdflib.namespace import DefinedNamespace, Namespace
|
9
|
+
from rdflib.term import Identifier
|
10
|
+
from typing import Union
|
11
|
+
|
12
|
+
from builtins import list, set, tuple, str
|
13
|
+
|
14
|
+
|
15
|
+
namespace = "https://mustrd.com/query/"
|
16
|
+
|
17
|
+
class MustrdQueryProcessor:
|
18
|
+
original_query : Union [Query, ParseResults]
|
19
|
+
current_query : Union [Query, ParseResults]
|
20
|
+
graph : Graph
|
21
|
+
algebra_mode: bool = False
|
22
|
+
graph_mode: bool = True
|
23
|
+
|
24
|
+
def __init__(self, query_str: str, algebra_mode: bool = False, graph_mode: bool = True):
|
25
|
+
parsetree = parseQuery(query_str)
|
26
|
+
# Init original query to algebra or parsed query
|
27
|
+
self.original_query = (algebra_mode and translateQuery(parsetree)) or parsetree
|
28
|
+
self.current_query = self.original_query
|
29
|
+
self.algebra_mode = algebra_mode
|
30
|
+
self.graph_mode = graph_mode
|
31
|
+
self.graph = Graph()
|
32
|
+
if graph_mode:
|
33
|
+
self.query_to_graph((algebra_mode and self.original_query.algebra) or parsetree._toklist, BNode())
|
34
|
+
|
35
|
+
def query_to_graph(self, part: CompValue, partBnode):
|
36
|
+
if not part or not partBnode:
|
37
|
+
return
|
38
|
+
self.graph.add((partBnode, RDF.type, URIRef(namespace + type(part).__name__)))
|
39
|
+
self.graph.add((partBnode, QUERY.has_class , Literal(str(part.__class__.__name__))))
|
40
|
+
if isinstance(part, CompValue) or isinstance(part, ParseResults):
|
41
|
+
self.graph.add((partBnode, QUERY.name , Literal(part.name)))
|
42
|
+
if isinstance(part, CompValue):
|
43
|
+
for key, sub_part in part.items():
|
44
|
+
sub_part_bnode = BNode()
|
45
|
+
self.graph.add((partBnode, URIRef(namespace + str(key)) , sub_part_bnode))
|
46
|
+
self.query_to_graph(sub_part, sub_part_bnode)
|
47
|
+
elif hasattr(part, '__iter__') and not isinstance(part, Identifier) and not isinstance(part, str):
|
48
|
+
for sub_part in part:
|
49
|
+
sub_part_bnode = BNode()
|
50
|
+
self.graph.add((partBnode, QUERY.has_list , sub_part_bnode))
|
51
|
+
self.query_to_graph(sub_part, sub_part_bnode)
|
52
|
+
elif isinstance(part, Identifier) or isinstance(part, str):
|
53
|
+
self.graph.add((partBnode, QUERY.has_value, Literal(part)))
|
54
|
+
|
55
|
+
def serialize_graph(self):
|
56
|
+
if not self.graph_mode:
|
57
|
+
raise Exception("Not able to execute that function if graph mode is not activated: cannot work with two sources of truth")
|
58
|
+
return self.graph.serialize(format = "ttl")
|
59
|
+
|
60
|
+
def query_graph(self, meta_query: str):
|
61
|
+
if not self.graph_mode:
|
62
|
+
raise Exception("Not able to execute that function if graph mode is not activated: cannot work with two sources of truth")
|
63
|
+
return self.graph.query(meta_query)
|
64
|
+
|
65
|
+
def update(self, meta_query: str):
|
66
|
+
if not self.graph_mode:
|
67
|
+
# Implement update directly on objects: self.current_query
|
68
|
+
pass
|
69
|
+
return self.graph.update(meta_query)
|
70
|
+
|
71
|
+
def get_query(self):
|
72
|
+
if self.graph_mode:
|
73
|
+
roots = self.graph.query("SELECT DISTINCT ?sub WHERE {?sub ?prop ?obj FILTER NOT EXISTS {?s ?p ?sub}}")
|
74
|
+
if len(roots) != 1:
|
75
|
+
raise Exception("query graph has more than one root: invalid")
|
76
|
+
|
77
|
+
for root in roots:
|
78
|
+
new_query = self.graph_to_query(root.sub)
|
79
|
+
if not self.algebra_mode:
|
80
|
+
new_query = ParseResults(toklist=new_query, name=self.original_query.name)
|
81
|
+
new_query = translateQuery(new_query)
|
82
|
+
else:
|
83
|
+
new_query = Query(algebra=new_query, prologue=self.original_query.prologue)
|
84
|
+
else:
|
85
|
+
if not self.algebra_mode:
|
86
|
+
new_query = translateQuery(self.current_query)
|
87
|
+
else:
|
88
|
+
new_query = self.current_query
|
89
|
+
return translateAlgebra(new_query)
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
def graph_to_query(self, subject):
|
94
|
+
subject_dict = self.get_subject_dict(subject)
|
95
|
+
if QUERY.has_class in subject_dict:
|
96
|
+
class_name = str(subject_dict[QUERY.has_class])
|
97
|
+
subject_dict.pop(QUERY.has_class)
|
98
|
+
if class_name in globals():
|
99
|
+
clazz = globals()[class_name]
|
100
|
+
if clazz in (CompValue, Expr):
|
101
|
+
comp_name = str(subject_dict[QUERY.name])
|
102
|
+
subject_dict.pop(QUERY.name)
|
103
|
+
subject_dict.pop(RDF.type)
|
104
|
+
new_dict = dict(map(lambda kv: [str(kv[0]).replace(str(QUERY._NS), ""),
|
105
|
+
self.graph_to_query(kv[1])] ,
|
106
|
+
subject_dict.items()))
|
107
|
+
return clazz(comp_name, **new_dict)
|
108
|
+
elif clazz in (set, list, tuple) and QUERY.has_list in subject_dict:
|
109
|
+
return clazz(map(lambda item: self.graph_to_query(item), subject_dict[QUERY.has_list]))
|
110
|
+
elif clazz == ParseResults and QUERY.has_list in subject_dict:
|
111
|
+
return ParseResults(toklist=list(map(lambda item: self.graph_to_query(item), subject_dict[QUERY.has_list])))
|
112
|
+
elif clazz in (Literal, Variable, URIRef, str) and QUERY.has_value in subject_dict:
|
113
|
+
return clazz(str(subject_dict[QUERY.has_value]))
|
114
|
+
|
115
|
+
|
116
|
+
def get_subject_dict(self, subject):
|
117
|
+
dict = {}
|
118
|
+
for key, value in self.graph.predicate_objects(subject):
|
119
|
+
# If key already exists: create or add to a list
|
120
|
+
if key == QUERY.has_list:
|
121
|
+
if key in dict:
|
122
|
+
dict[key].append(value)
|
123
|
+
else:
|
124
|
+
dict[key] = [value]
|
125
|
+
else:
|
126
|
+
dict[key] = value
|
127
|
+
return dict
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
class QUERY(DefinedNamespace):
|
132
|
+
_NS = Namespace("https://mustrd.com/query/")
|
133
|
+
has_class : URIRef
|
134
|
+
has_list : URIRef
|
135
|
+
name : URIRef
|
136
|
+
has_value: URIRef
|
@@ -24,7 +24,6 @@ SOFTWARE.
|
|
24
24
|
|
25
25
|
import os
|
26
26
|
from dataclasses import dataclass, field
|
27
|
-
from itertools import groupby
|
28
27
|
from pathlib import Path
|
29
28
|
from typing import Tuple, List, Type
|
30
29
|
|
@@ -39,6 +38,7 @@ from . import logger_setup
|
|
39
38
|
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart, get_query_from_querybuilder, get_query_from_step
|
40
39
|
from .namespace import MUST, TRIPLESTORE
|
41
40
|
from multimethods import MultiMethod, Default
|
41
|
+
from .utils import get_mustrd_root
|
42
42
|
|
43
43
|
log = logger_setup.setup_logger(__name__)
|
44
44
|
|
@@ -84,18 +84,14 @@ class SpecComponentDetails:
|
|
84
84
|
spec_component_node: Node
|
85
85
|
data_source_type: Node
|
86
86
|
run_config: dict
|
87
|
+
root_paths: list
|
87
88
|
|
88
|
-
def get_path(path_type: str,
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
except(KeyError):
|
95
|
-
if str(run_config['data_path']).startswith("/"):
|
96
|
-
return run_config['data_path']
|
97
|
-
else:
|
98
|
-
return Path(os.path.join(run_config['spec_path'], run_config['data_path']))
|
89
|
+
def get_path(path_type: str, file_name, spec_component_details: SpecComponentDetails) -> Path:
|
90
|
+
if path_type in spec_component_details.run_config:
|
91
|
+
relative_path = os.path.join(spec_component_details.run_config[path_type], file_name)
|
92
|
+
else:
|
93
|
+
relative_path = file_name
|
94
|
+
return get_file_absolute_path(spec_component_details, relative_path)
|
99
95
|
|
100
96
|
|
101
97
|
def parse_spec_component(subject: URIRef,
|
@@ -117,7 +113,8 @@ def parse_spec_component(subject: URIRef,
|
|
117
113
|
mustrd_triple_store=mustrd_triple_store,
|
118
114
|
spec_component_node=spec_component_node,
|
119
115
|
data_source_type=data_source_type,
|
120
|
-
run_config=run_config
|
116
|
+
run_config=run_config,
|
117
|
+
root_paths=get_components_roots(spec_graph, subject, run_config))
|
121
118
|
spec_component = get_spec_component(spec_component_details)
|
122
119
|
if type(spec_component) == list:
|
123
120
|
spec_components += spec_component
|
@@ -130,6 +127,39 @@ def parse_spec_component(subject: URIRef,
|
|
130
127
|
# print(f"calling multimethod with {spec_components}")
|
131
128
|
return combine_specs(spec_components)
|
132
129
|
|
130
|
+
# Here we retrieve all the possible root paths for a specification component.
|
131
|
+
# This defines the order of priority between root paths which is:
|
132
|
+
# 1) Path where the spec is located
|
133
|
+
# 2) spec_path defined in mustrd test configuration files or cmd line argument
|
134
|
+
# 3) data_path defined in mustrd test configuration files or cmd line argument
|
135
|
+
# 4) Mustrd source folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
|
136
|
+
# We intentionally don't try for absolute files, but you should feel free to argue that we should do
|
137
|
+
def get_components_roots(spec_graph: Graph, subject: URIRef, run_config: dict):
|
138
|
+
where_did_i_load_this_spec_from = spec_graph.value(subject=subject,
|
139
|
+
predicate=MUST.specSourceFile)
|
140
|
+
roots = []
|
141
|
+
if (where_did_i_load_this_spec_from == None):
|
142
|
+
log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_graph}")
|
143
|
+
else:
|
144
|
+
roots.append(Path(os.path.dirname(where_did_i_load_this_spec_from)))
|
145
|
+
if run_config and'spec_path' in run_config:
|
146
|
+
roots.append(Path(run_config['spec_path']))
|
147
|
+
if run_config and 'data_path' in run_config:
|
148
|
+
roots.append(run_config['data_path'])
|
149
|
+
roots.append(get_mustrd_root())
|
150
|
+
|
151
|
+
return roots
|
152
|
+
|
153
|
+
|
154
|
+
# From the list of component potential roots, return the first path that exists
|
155
|
+
def get_file_absolute_path(spec_component_details: SpecComponentDetails, relative_file_path: str):
|
156
|
+
if not relative_file_path:
|
157
|
+
raise ValueError("Cannot get absolute path of None")
|
158
|
+
absolute_file_paths = list(map(lambda root_path: Path(os.path.join(root_path, relative_file_path)), spec_component_details.root_paths))
|
159
|
+
for absolute_file_path in absolute_file_paths:
|
160
|
+
if (os.path.exists(absolute_file_path)):
|
161
|
+
return absolute_file_path
|
162
|
+
raise FileNotFoundError(f"Could not find file {relative_file_path=} in any of the {absolute_file_paths=}")
|
133
163
|
|
134
164
|
def get_spec_component_type(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
|
135
165
|
# Get the type of the first object in the list
|
@@ -224,7 +254,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
|
|
224
254
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
225
255
|
predicate=MUST.fileName)
|
226
256
|
|
227
|
-
path =
|
257
|
+
path = get_path('given_path', file_name,spec_component_details)
|
228
258
|
try:
|
229
259
|
spec_component.value = Graph().parse(data=get_spec_component_from_file(path))
|
230
260
|
except ParserError as e:
|
@@ -240,7 +270,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
|
|
240
270
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
241
271
|
predicate=MUST.fileName)
|
242
272
|
|
243
|
-
path =
|
273
|
+
path = get_path('when_path', file_name,spec_component_details)
|
244
274
|
spec_component.value = get_spec_component_from_file(path)
|
245
275
|
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
246
276
|
predicate=MUST.queryType)
|
@@ -253,7 +283,7 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
|
|
253
283
|
|
254
284
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
255
285
|
predicate=MUST.fileName)
|
256
|
-
path =
|
286
|
+
path = get_path('then_path', file_name,spec_component_details)
|
257
287
|
|
258
288
|
return load_dataset_from_file(path, spec_component)
|
259
289
|
|
@@ -264,27 +294,9 @@ def _get_spec_component_filedatasource(spec_component_details: SpecComponentDeta
|
|
264
294
|
return load_spec_component(spec_component_details, spec_component)
|
265
295
|
|
266
296
|
def load_spec_component(spec_component_details, spec_component):
|
267
|
-
where_did_i_load_this_spec_from = spec_component_details.spec_graph.value(subject=spec_component_details.subject,
|
268
|
-
predicate=MUST.specSourceFile)
|
269
|
-
if (where_did_i_load_this_spec_from == None):
|
270
|
-
log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={spec_component_details.subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_component_details.spec_graph}")
|
271
297
|
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
272
298
|
predicate=MUST.file)))
|
273
|
-
|
274
|
-
test_spec_file_path = os.path.dirname(where_did_i_load_this_spec_from)
|
275
|
-
|
276
|
-
# first we try local relative to the test_spec_file_path, then we try relative to the path under test
|
277
|
-
# we intentionally don't try for absolute files, but you should feel free to argue that we should do.
|
278
|
-
paths = [
|
279
|
-
Path(test_spec_file_path, file_path),
|
280
|
-
Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
|
281
|
-
]
|
282
|
-
|
283
|
-
for path in paths:
|
284
|
-
if (os.path.exists(path)):
|
285
|
-
return load_dataset_from_file(path, spec_component)
|
286
|
-
|
287
|
-
raise FileNotFoundError(f"Could not find file {file_path=} in any of the {paths=}")
|
299
|
+
return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
|
288
300
|
|
289
301
|
|
290
302
|
def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
@@ -320,12 +332,8 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
|
|
320
332
|
spec_component = init_spec_component(spec_component_details.predicate)
|
321
333
|
|
322
334
|
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
323
|
-
predicate=MUST.file)))
|
324
|
-
|
325
|
-
path = file_path
|
326
|
-
else: #relative path
|
327
|
-
path = Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
|
328
|
-
spec_component.value = get_spec_component_from_file(path)
|
335
|
+
predicate=MUST.file)))
|
336
|
+
spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
|
329
337
|
|
330
338
|
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
331
339
|
predicate=MUST.queryType)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "mustrd"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
4
|
description = "A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber."
|
5
5
|
authors = ["John Placek <john.placek@semanticpartners.com>",
|
6
6
|
"Juliane Piñeiro-Winkler <juliane.pineiro-winkler@semanticpartners.com>",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|