mustrd 0.1.8__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {mustrd-0.1.8 → mustrd-0.2.0}/PKG-INFO +1 -1
  2. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/README.adoc +9 -0
  3. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrd.py +1 -2
  4. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdAnzo.py +61 -77
  5. mustrd-0.2.0/mustrd/mustrdQueryProcessor.py +136 -0
  6. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/spec_component.py +49 -41
  7. {mustrd-0.1.8 → mustrd-0.2.0}/pyproject.toml +1 -1
  8. {mustrd-0.1.8 → mustrd-0.2.0}/LICENSE +0 -0
  9. {mustrd-0.1.8 → mustrd-0.2.0}/README.adoc +0 -0
  10. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/TestResult.py +0 -0
  11. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/__init__.py +0 -0
  12. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/logger_setup.py +0 -0
  13. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/catalog-v001.xml +0 -0
  14. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdShapes.ttl +0 -0
  15. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdTestOntology.ttl +0 -0
  16. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/mustrdTestShapes.ttl +0 -0
  17. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/ontology.ttl +0 -0
  18. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/test-resources/resources.ttl +0 -0
  19. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/triplestoreOntology.ttl +0 -0
  20. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/model/triplestoreshapes.ttl +0 -0
  21. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdGraphDb.py +0 -0
  22. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdRdfLib.py +0 -0
  23. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/mustrdTestPlugin.py +0 -0
  24. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/namespace.py +0 -0
  25. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/run.py +0 -0
  26. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/steprunner.py +0 -0
  27. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_ResultList_leaf_template.jinja +0 -0
  28. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_ResultList_template.jinja +0 -0
  29. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/templates/md_stats_template.jinja +0 -0
  30. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/test/test_mustrd.py +0 -0
  31. {mustrd-0.1.8 → mustrd-0.2.0}/mustrd/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mustrd
3
- Version: 0.1.8
3
+ Version: 0.2.0
4
4
  Summary: A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber.
5
5
  Home-page: https://github.com/Semantic-partners/mustrd
6
6
  License: MIT
@@ -26,6 +26,15 @@ Run `pytest` from the project root.
26
26
 
27
27
  If you have got this far then you are probably ready to create your own specifications to test your application SPARQL queries. These will be executed against the default RDFLib triplestore unless you configure one or more alternatives. The instructions for this are included in <<Configuring external triplestores>> below.
28
28
 
29
+ === Paths
30
+ All paths are consired relative. That way mustrd tests can be versionned and shared easily.
31
+ To get absolute path from relative path in a spec file, we prefix it with the first existing result in:
32
+ 1) Path where the spec is located
33
+ 2) spec_path defined in mustrd test configuration files or cmd line argument
34
+ 3) data_path defined in mustrd test configuration files or cmd line argument
35
+ 4) Mustrd folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
36
+ We intentionally use the same method to build paths in all spec components to avoid confusion.
37
+
29
38
  === Givens
30
39
  These are used to specify the dataset against which the SPARQL statement will be run.
31
40
  They can be generated from external sources such as an existing graph, or a file or folder containing serialised RDF. It is also possible to specify the dataset as reified RDF directly in the test step. Currently tabular data sources such as csv files or TableDatasets are not supported.
@@ -304,8 +304,7 @@ def run_specs(specs) -> List[SpecResult]:
304
304
  return results
305
305
 
306
306
  def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
307
- test = str(spec_graph.value(subject = spec_uri, predicate = MUST.specFileName, default = "default.mustrd.ttl"))
308
- return test
307
+ return str(spec_graph.value(subject = spec_uri, predicate = MUST.specFileName, default = "default.mustrd.ttl"))
309
308
 
310
309
  def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
311
310
  try:
@@ -28,7 +28,6 @@ from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
28
28
  from requests import ConnectTimeout, Response, HTTPError, RequestException, ConnectionError
29
29
  from bs4 import BeautifulSoup
30
30
  import logging
31
- from .namespace import MUST
32
31
 
33
32
 
34
33
  # https://github.com/Semantic-partners/mustrd/issues/73
@@ -43,7 +42,6 @@ def manage_anzo_response(response: Response) -> str:
43
42
  else:
44
43
  raise RequestException(f"Anzo error, status code: {response.status_code}, content: {content_string}")
45
44
 
46
-
47
45
  def query_with_bindings(bindings: dict, when: str) -> str:
48
46
  values = ""
49
47
  for key, value in bindings.items():
@@ -52,68 +50,70 @@ def query_with_bindings(bindings: dict, when: str) -> str:
52
50
  return f"{split_query[0].strip()} WHERE {{ {values} {split_query[1].strip()}"
53
51
 
54
52
  def execute_select (triple_store: dict, when: str, bindings: dict = None) -> str:
53
+ if bindings:
54
+ when = query_with_bindings(bindings, when)
55
+ # Just remove ${fromSources} if we are executing a query step; the sources are defined using http parameters
56
+ when = when.replace("${fromSources}", "")
57
+ return execute_sparql(triple_store, False, when, triple_store['input_graph'])
58
+
59
+ PARAMS = {
60
+ # Update parameters for INSERT / DELETE
61
+ True: {
62
+ "default-graph-param": "using-graph-uri",
63
+ "named-graph-param":"using-named-graph-uri",
64
+ "Content-Type": "application/sparql-update"
65
+ },
66
+ # Query parameters for SELECT / CONSTRUCT
67
+ False: {
68
+ "default-graph-param": "default-graph-uri",
69
+ "named-graph-param":"named-graph-uri",
70
+ "Content-Type": "application/sparql-query"
71
+ }
72
+ }
73
+ def execute_sparql(triple_store: dict, is_update: bool, sparql, graph: str, format: str = "application/sparql-results+json"):
74
+ params = {
75
+ "format" : format,
76
+ "datasourceURI" : triple_store['gqe_uri'],
77
+ "skipCache": "true",
78
+ # Default and named datasets have different query param for query and update
79
+ PARAMS[is_update]["default-graph-param"] : graph,
80
+ PARAMS[is_update]["named-graph-param"] : graph
81
+ }
82
+ headers={"Content-Type": PARAMS[is_update]["Content-Type"]}
55
83
  try:
56
- if bindings:
57
- when = query_with_bindings(bindings, when)
58
- when = when.replace("${fromSources}", f"FROM <{triple_store['input_graph']}>\nFROM <{triple_store['output_graph']}>").replace(
59
- "${targetGraph}", f"<{triple_store['output_graph']}>")
60
- data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
61
- 'skipCache': 'true'}
62
- url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=application/sparql-results+json"
63
- return manage_anzo_response(requests.post(url=url,
64
- auth=(triple_store['username'], triple_store['password']),
65
- data=data,
66
- verify=False))
67
- except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
84
+ response = manage_anzo_response(requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
85
+ params=params,
86
+ auth=(triple_store['username'], triple_store['password']),
87
+ headers=headers,
88
+ data=sparql,
89
+ verify=False))
90
+ logging.debug(f'response {response}')
91
+ return response
92
+ except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
93
+ logging.error(f'response {e}')
68
94
  raise
69
95
 
70
96
  def execute_update(triple_store: dict, when: str, bindings: dict = None) -> Graph:
71
97
  logging.debug(f"updating in anzo! {triple_store=} {when=}")
72
- input_graph = triple_store['input_graph']
73
- output_graph = triple_store['output_graph']
74
-
75
- substituted_query = when.replace("${usingSources}", f"USING <{triple_store['input_graph']}> \nUSING <{triple_store['output_graph']}>").replace(
76
- "${targetGraph}", f"<{output_graph}>")
77
-
78
- data = {'datasourceURI': triple_store['gqe_uri'], 'update': substituted_query,
79
- 'default-graph-uri': input_graph, 'skipCache': 'true'}
80
- url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
81
- response = manage_anzo_response(requests.post(url=url,
82
- auth=(triple_store['username'],
83
- triple_store['password']),
84
- data=data,
85
- verify=False))
86
- logging.debug(f'response {response}')
87
- check_data = {'datasourceURI': triple_store['gqe_uri'], 'query': "construct {?s ?p ?o} { ?s ?p ?o }",
88
- 'default-graph-uri': output_graph, 'skipCache': 'true'}
89
- everything_response = manage_anzo_response(requests.post(url=url,
90
- auth=(triple_store['username'],
91
- triple_store['password']),
92
- data=check_data,
93
- verify=False))
94
- # todo deal with error responses
95
- new_graph = Graph().parse(data=everything_response)
98
+
99
+ # FIXME If query doesn't contain ${targetGraph}, then graph should be defined explicitly in the query
100
+ substituted_query = when.replace("${usingSources}", "").replace(
101
+ "${targetGraph}", f"<{triple_store['output_graph']}>")
102
+
103
+ execute_sparql(triple_store, True, substituted_query, triple_store['input_graph'], "ttl")
104
+
105
+ new_graph = execute_construct(triple_store, "construct {?s ?p ?o} { ?s ?p ?o }")
106
+
96
107
  logging.debug(f"new_graph={new_graph.serialize(format='ttl')}")
108
+
97
109
  return new_graph
98
110
 
99
111
 
100
112
  def execute_construct(triple_store: dict, when: str, bindings: dict = None) -> Graph:
101
- try:
102
- if bindings:
103
- when = query_with_bindings(bindings, when)
104
- data = {'datasourceURI': triple_store['gqe_uri'], 'query': when,
105
- 'default-graph-uri': triple_store['input_graph'], 'skipCache': 'true'}
106
- url = f"https://{triple_store['url']}:{triple_store['port']}/sparql?format=ttl"
107
- response = requests.post(url=url,
108
- auth=(triple_store['username'],
109
- triple_store['password']),
110
- data=data,
111
- verify=False)
112
- logging.debug(f'response {response}')
113
- return Graph().parse(data=manage_anzo_response(response))
114
- except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout) as e:
115
- logging.error(f'response {e}')
116
- raise
113
+ if bindings:
114
+ when = query_with_bindings(bindings, when)
115
+ response = execute_sparql(triple_store, False, when, triple_store['input_graph'], "ttl")
116
+ return Graph().parse(data=response)
117
117
 
118
118
 
119
119
  # Get Given or then from the content of a graphmart
@@ -207,30 +207,14 @@ SELECT ?query ?param_query ?query_template
207
207
 
208
208
  def upload_given(triple_store: dict, given: Graph):
209
209
  logging.debug(f"upload_given {triple_store} {given}")
210
-
211
- try:
212
- input_graph = triple_store['input_graph']
213
- output_graph = triple_store['output_graph']
214
- clear_graph(triple_store, input_graph)
215
- clear_graph(triple_store, output_graph)
216
- serialized_given = given.serialize(format="nt")
217
- insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
218
- data = {'datasourceURI': triple_store['gqe_uri'], 'update': insert_query}
219
- response = requests.post(url=f"https://{triple_store['url']}:{triple_store['port']}/sparql",
220
- auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
221
- manage_anzo_response(response)
222
- except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
223
- raise
210
+ clear_graph(triple_store, triple_store['input_graph'])
211
+ clear_graph(triple_store, triple_store['output_graph'])
212
+ serialized_given = given.serialize(format="nt")
213
+
214
+ insert_query = f"INSERT DATA {{graph <{triple_store['input_graph']}>{{{serialized_given}}}}}"
215
+ execute_sparql(triple_store, True, insert_query, None, None)
224
216
 
225
217
 
226
218
  def clear_graph(triple_store: dict, graph_uri: str):
227
- try:
228
- clear_query = f"CLEAR GRAPH <{graph_uri}>"
229
- data = {'datasourceURI': triple_store['gqe_uri'], 'update': clear_query}
230
- url = f"https://{triple_store['url']}:{triple_store['port']}/sparql"
231
- response = requests.post(url=url,
232
- auth=(triple_store['username'], triple_store['password']), data=data, verify=False)
233
- manage_anzo_response(response)
234
- except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
235
- raise
219
+ execute_sparql(triple_store, True, f"CLEAR GRAPH <{graph_uri}>", None, None)
236
220
 
@@ -0,0 +1,136 @@
1
+
2
+ from pyparsing import ParseResults
3
+ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, BNode
4
+ from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
5
+ from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate, translateAlgebra
6
+ from rdflib.plugins.sparql.sparql import Query
7
+ from rdflib.plugins.sparql.parserutils import CompValue, value, Expr
8
+ from rdflib.namespace import DefinedNamespace, Namespace
9
+ from rdflib.term import Identifier
10
+ from typing import Union
11
+
12
+ from builtins import list, set, tuple, str
13
+
14
+
15
+ namespace = "https://mustrd.com/query/"
16
+
17
+ class MustrdQueryProcessor:
18
+ original_query : Union [Query, ParseResults]
19
+ current_query : Union [Query, ParseResults]
20
+ graph : Graph
21
+ algebra_mode: bool = False
22
+ graph_mode: bool = True
23
+
24
+ def __init__(self, query_str: str, algebra_mode: bool = False, graph_mode: bool = True):
25
+ parsetree = parseQuery(query_str)
26
+ # Init original query to algebra or parsed query
27
+ self.original_query = (algebra_mode and translateQuery(parsetree)) or parsetree
28
+ self.current_query = self.original_query
29
+ self.algebra_mode = algebra_mode
30
+ self.graph_mode = graph_mode
31
+ self.graph = Graph()
32
+ if graph_mode:
33
+ self.query_to_graph((algebra_mode and self.original_query.algebra) or parsetree._toklist, BNode())
34
+
35
+ def query_to_graph(self, part: CompValue, partBnode):
36
+ if not part or not partBnode:
37
+ return
38
+ self.graph.add((partBnode, RDF.type, URIRef(namespace + type(part).__name__)))
39
+ self.graph.add((partBnode, QUERY.has_class , Literal(str(part.__class__.__name__))))
40
+ if isinstance(part, CompValue) or isinstance(part, ParseResults):
41
+ self.graph.add((partBnode, QUERY.name , Literal(part.name)))
42
+ if isinstance(part, CompValue):
43
+ for key, sub_part in part.items():
44
+ sub_part_bnode = BNode()
45
+ self.graph.add((partBnode, URIRef(namespace + str(key)) , sub_part_bnode))
46
+ self.query_to_graph(sub_part, sub_part_bnode)
47
+ elif hasattr(part, '__iter__') and not isinstance(part, Identifier) and not isinstance(part, str):
48
+ for sub_part in part:
49
+ sub_part_bnode = BNode()
50
+ self.graph.add((partBnode, QUERY.has_list , sub_part_bnode))
51
+ self.query_to_graph(sub_part, sub_part_bnode)
52
+ elif isinstance(part, Identifier) or isinstance(part, str):
53
+ self.graph.add((partBnode, QUERY.has_value, Literal(part)))
54
+
55
+ def serialize_graph(self):
56
+ if not self.graph_mode:
57
+ raise Exception("Not able to execute that function if graph mode is not activated: cannot work with two sources of truth")
58
+ return self.graph.serialize(format = "ttl")
59
+
60
+ def query_graph(self, meta_query: str):
61
+ if not self.graph_mode:
62
+ raise Exception("Not able to execute that function if graph mode is not activated: cannot work with two sources of truth")
63
+ return self.graph.query(meta_query)
64
+
65
+ def update(self, meta_query: str):
66
+ if not self.graph_mode:
67
+ # Implement update directly on objects: self.current_query
68
+ pass
69
+ return self.graph.update(meta_query)
70
+
71
+ def get_query(self):
72
+ if self.graph_mode:
73
+ roots = self.graph.query("SELECT DISTINCT ?sub WHERE {?sub ?prop ?obj FILTER NOT EXISTS {?s ?p ?sub}}")
74
+ if len(roots) != 1:
75
+ raise Exception("query graph has more than one root: invalid")
76
+
77
+ for root in roots:
78
+ new_query = self.graph_to_query(root.sub)
79
+ if not self.algebra_mode:
80
+ new_query = ParseResults(toklist=new_query, name=self.original_query.name)
81
+ new_query = translateQuery(new_query)
82
+ else:
83
+ new_query = Query(algebra=new_query, prologue=self.original_query.prologue)
84
+ else:
85
+ if not self.algebra_mode:
86
+ new_query = translateQuery(self.current_query)
87
+ else:
88
+ new_query = self.current_query
89
+ return translateAlgebra(new_query)
90
+
91
+
92
+
93
+ def graph_to_query(self, subject):
94
+ subject_dict = self.get_subject_dict(subject)
95
+ if QUERY.has_class in subject_dict:
96
+ class_name = str(subject_dict[QUERY.has_class])
97
+ subject_dict.pop(QUERY.has_class)
98
+ if class_name in globals():
99
+ clazz = globals()[class_name]
100
+ if clazz in (CompValue, Expr):
101
+ comp_name = str(subject_dict[QUERY.name])
102
+ subject_dict.pop(QUERY.name)
103
+ subject_dict.pop(RDF.type)
104
+ new_dict = dict(map(lambda kv: [str(kv[0]).replace(str(QUERY._NS), ""),
105
+ self.graph_to_query(kv[1])] ,
106
+ subject_dict.items()))
107
+ return clazz(comp_name, **new_dict)
108
+ elif clazz in (set, list, tuple) and QUERY.has_list in subject_dict:
109
+ return clazz(map(lambda item: self.graph_to_query(item), subject_dict[QUERY.has_list]))
110
+ elif clazz == ParseResults and QUERY.has_list in subject_dict:
111
+ return ParseResults(toklist=list(map(lambda item: self.graph_to_query(item), subject_dict[QUERY.has_list])))
112
+ elif clazz in (Literal, Variable, URIRef, str) and QUERY.has_value in subject_dict:
113
+ return clazz(str(subject_dict[QUERY.has_value]))
114
+
115
+
116
+ def get_subject_dict(self, subject):
117
+ dict = {}
118
+ for key, value in self.graph.predicate_objects(subject):
119
+ # If key already exists: create or add to a list
120
+ if key == QUERY.has_list:
121
+ if key in dict:
122
+ dict[key].append(value)
123
+ else:
124
+ dict[key] = [value]
125
+ else:
126
+ dict[key] = value
127
+ return dict
128
+
129
+
130
+
131
+ class QUERY(DefinedNamespace):
132
+ _NS = Namespace("https://mustrd.com/query/")
133
+ has_class : URIRef
134
+ has_list : URIRef
135
+ name : URIRef
136
+ has_value: URIRef
@@ -24,7 +24,6 @@ SOFTWARE.
24
24
 
25
25
  import os
26
26
  from dataclasses import dataclass, field
27
- from itertools import groupby
28
27
  from pathlib import Path
29
28
  from typing import Tuple, List, Type
30
29
 
@@ -39,6 +38,7 @@ from . import logger_setup
39
38
  from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart, get_query_from_querybuilder, get_query_from_step
40
39
  from .namespace import MUST, TRIPLESTORE
41
40
  from multimethods import MultiMethod, Default
41
+ from .utils import get_mustrd_root
42
42
 
43
43
  log = logger_setup.setup_logger(__name__)
44
44
 
@@ -84,18 +84,14 @@ class SpecComponentDetails:
84
84
  spec_component_node: Node
85
85
  data_source_type: Node
86
86
  run_config: dict
87
+ root_paths: list
87
88
 
88
- def get_path(path_type: str, run_config: dict) -> Path:
89
- try:
90
- if str(run_config[path_type]).startswith("/"):
91
- return run_config[path_type]
92
- else:
93
- return Path(os.path.join(run_config['spec_path'], run_config[path_type]))
94
- except(KeyError):
95
- if str(run_config['data_path']).startswith("/"):
96
- return run_config['data_path']
97
- else:
98
- return Path(os.path.join(run_config['spec_path'], run_config['data_path']))
89
+ def get_path(path_type: str, file_name, spec_component_details: SpecComponentDetails) -> Path:
90
+ if path_type in spec_component_details.run_config:
91
+ relative_path = os.path.join(spec_component_details.run_config[path_type], file_name)
92
+ else:
93
+ relative_path = file_name
94
+ return get_file_absolute_path(spec_component_details, relative_path)
99
95
 
100
96
 
101
97
  def parse_spec_component(subject: URIRef,
@@ -117,7 +113,8 @@ def parse_spec_component(subject: URIRef,
117
113
  mustrd_triple_store=mustrd_triple_store,
118
114
  spec_component_node=spec_component_node,
119
115
  data_source_type=data_source_type,
120
- run_config=run_config)
116
+ run_config=run_config,
117
+ root_paths=get_components_roots(spec_graph, subject, run_config))
121
118
  spec_component = get_spec_component(spec_component_details)
122
119
  if type(spec_component) == list:
123
120
  spec_components += spec_component
@@ -130,6 +127,39 @@ def parse_spec_component(subject: URIRef,
130
127
  # print(f"calling multimethod with {spec_components}")
131
128
  return combine_specs(spec_components)
132
129
 
130
+ # Here we retrieve all the possible root paths for a specification component.
131
+ # This defines the order of priority between root paths which is:
132
+ # 1) Path where the spec is located
133
+ # 2) spec_path defined in mustrd test configuration files or cmd line argument
134
+ # 3) data_path defined in mustrd test configuration files or cmd line argument
135
+ # 4) Mustrd source folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
136
+ # We intentionally don't try for absolute files, but you should feel free to argue that we should do
137
+ def get_components_roots(spec_graph: Graph, subject: URIRef, run_config: dict):
138
+ where_did_i_load_this_spec_from = spec_graph.value(subject=subject,
139
+ predicate=MUST.specSourceFile)
140
+ roots = []
141
+ if (where_did_i_load_this_spec_from == None):
142
+ log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_graph}")
143
+ else:
144
+ roots.append(Path(os.path.dirname(where_did_i_load_this_spec_from)))
145
+ if run_config and'spec_path' in run_config:
146
+ roots.append(Path(run_config['spec_path']))
147
+ if run_config and 'data_path' in run_config:
148
+ roots.append(run_config['data_path'])
149
+ roots.append(get_mustrd_root())
150
+
151
+ return roots
152
+
153
+
154
+ # From the list of component potential roots, return the first path that exists
155
+ def get_file_absolute_path(spec_component_details: SpecComponentDetails, relative_file_path: str):
156
+ if not relative_file_path:
157
+ raise ValueError("Cannot get absolute path of None")
158
+ absolute_file_paths = list(map(lambda root_path: Path(os.path.join(root_path, relative_file_path)), spec_component_details.root_paths))
159
+ for absolute_file_path in absolute_file_paths:
160
+ if (os.path.exists(absolute_file_path)):
161
+ return absolute_file_path
162
+ raise FileNotFoundError(f"Could not find file {relative_file_path=} in any of the {absolute_file_paths=}")
133
163
 
134
164
  def get_spec_component_type(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
135
165
  # Get the type of the first object in the list
@@ -224,7 +254,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
224
254
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
225
255
  predicate=MUST.fileName)
226
256
 
227
- path = Path(os.path.join(str(get_path('given_path',spec_component_details.run_config)), str(file_name)))
257
+ path = get_path('given_path', file_name,spec_component_details)
228
258
  try:
229
259
  spec_component.value = Graph().parse(data=get_spec_component_from_file(path))
230
260
  except ParserError as e:
@@ -240,7 +270,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
240
270
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
241
271
  predicate=MUST.fileName)
242
272
 
243
- path = Path(os.path.join(str(get_path('when_path',spec_component_details.run_config)), str(file_name)))
273
+ path = get_path('when_path', file_name,spec_component_details)
244
274
  spec_component.value = get_spec_component_from_file(path)
245
275
  spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
246
276
  predicate=MUST.queryType)
@@ -253,7 +283,7 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
253
283
 
254
284
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
255
285
  predicate=MUST.fileName)
256
- path = Path(os.path.join(str(get_path('then_path',spec_component_details.run_config)), str(file_name)))
286
+ path = get_path('then_path', file_name,spec_component_details)
257
287
 
258
288
  return load_dataset_from_file(path, spec_component)
259
289
 
@@ -264,27 +294,9 @@ def _get_spec_component_filedatasource(spec_component_details: SpecComponentDeta
264
294
  return load_spec_component(spec_component_details, spec_component)
265
295
 
266
296
  def load_spec_component(spec_component_details, spec_component):
267
- where_did_i_load_this_spec_from = spec_component_details.spec_graph.value(subject=spec_component_details.subject,
268
- predicate=MUST.specSourceFile)
269
- if (where_did_i_load_this_spec_from == None):
270
- log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={spec_component_details.subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_component_details.spec_graph}")
271
297
  file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
272
298
  predicate=MUST.file)))
273
-
274
- test_spec_file_path = os.path.dirname(where_did_i_load_this_spec_from)
275
-
276
- # first we try local relative to the test_spec_file_path, then we try relative to the path under test
277
- # we intentionally don't try for absolute files, but you should feel free to argue that we should do.
278
- paths = [
279
- Path(test_spec_file_path, file_path),
280
- Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
281
- ]
282
-
283
- for path in paths:
284
- if (os.path.exists(path)):
285
- return load_dataset_from_file(path, spec_component)
286
-
287
- raise FileNotFoundError(f"Could not find file {file_path=} in any of the {paths=}")
299
+ return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
288
300
 
289
301
 
290
302
  def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
@@ -320,12 +332,8 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
320
332
  spec_component = init_spec_component(spec_component_details.predicate)
321
333
 
322
334
  file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
323
- predicate=MUST.file)))
324
- if str(file_path).startswith("/"): # absolute path
325
- path = file_path
326
- else: #relative path
327
- path = Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
328
- spec_component.value = get_spec_component_from_file(path)
335
+ predicate=MUST.file)))
336
+ spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
329
337
 
330
338
  spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
331
339
  predicate=MUST.queryType)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "mustrd"
3
- version = "0.1.8"
3
+ version = "0.2.0"
4
4
  description = "A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber."
5
5
  authors = ["John Placek <john.placek@semanticpartners.com>",
6
6
  "Juliane Piñeiro-Winkler <juliane.pineiro-winkler@semanticpartners.com>",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes