mustrd 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.adoc +210 -210
- mustrd/TestResult.py +136 -136
- mustrd/logger_setup.py +48 -48
- mustrd/model/catalog-v001.xml +5 -5
- mustrd/model/mustrdShapes.ttl +253 -253
- mustrd/model/mustrdTestShapes.ttl +24 -24
- mustrd/model/ontology.ttl +494 -494
- mustrd/model/test-resources/resources.ttl +60 -60
- mustrd/model/triplestoreOntology.ttl +174 -174
- mustrd/model/triplestoreshapes.ttl +41 -41
- mustrd/mustrd.py +787 -787
- mustrd/mustrdAnzo.py +236 -220
- mustrd/mustrdGraphDb.py +125 -125
- mustrd/mustrdRdfLib.py +56 -56
- mustrd/mustrdTestPlugin.py +327 -328
- mustrd/namespace.py +125 -125
- mustrd/run.py +106 -106
- mustrd/spec_component.py +690 -690
- mustrd/steprunner.py +166 -166
- mustrd/templates/md_ResultList_leaf_template.jinja +18 -18
- mustrd/templates/md_ResultList_template.jinja +8 -8
- mustrd/templates/md_stats_template.jinja +2 -2
- mustrd/test/test_mustrd.py +4 -4
- mustrd/utils.py +38 -38
- {mustrd-0.2.0.dist-info → mustrd-0.2.1.dist-info}/LICENSE +21 -21
- {mustrd-0.2.0.dist-info → mustrd-0.2.1.dist-info}/METADATA +4 -2
- mustrd-0.2.1.dist-info/RECORD +31 -0
- mustrd/mustrdQueryProcessor.py +0 -136
- mustrd-0.2.0.dist-info/RECORD +0 -32
- {mustrd-0.2.0.dist-info → mustrd-0.2.1.dist-info}/WHEEL +0 -0
- {mustrd-0.2.0.dist-info → mustrd-0.2.1.dist-info}/entry_points.txt +0 -0
mustrd/spec_component.py
CHANGED
@@ -1,690 +1,690 @@
|
|
1
|
-
"""
|
2
|
-
MIT License
|
3
|
-
|
4
|
-
Copyright (c) 2023 Semantic Partners Ltd
|
5
|
-
|
6
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
-
of this software and associated documentation files (the "Software"), to deal
|
8
|
-
in the Software without restriction, including without limitation the rights
|
9
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
-
copies of the Software, and to permit persons to whom the Software is
|
11
|
-
furnished to do so, subject to the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be included in all
|
14
|
-
copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
-
SOFTWARE.
|
23
|
-
"""
|
24
|
-
|
25
|
-
import os
|
26
|
-
from dataclasses import dataclass, field
|
27
|
-
from pathlib import Path
|
28
|
-
from typing import Tuple, List, Type
|
29
|
-
|
30
|
-
import pandas
|
31
|
-
import requests
|
32
|
-
from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util
|
33
|
-
from rdflib.exceptions import ParserError
|
34
|
-
from rdflib.term import Node
|
35
|
-
import logging
|
36
|
-
|
37
|
-
from . import logger_setup
|
38
|
-
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart, get_query_from_querybuilder, get_query_from_step
|
39
|
-
from .namespace import MUST, TRIPLESTORE
|
40
|
-
from multimethods import MultiMethod, Default
|
41
|
-
from .utils import get_mustrd_root
|
42
|
-
|
43
|
-
log = logger_setup.setup_logger(__name__)
|
44
|
-
|
45
|
-
|
46
|
-
@dataclass
|
47
|
-
class SpecComponent:
|
48
|
-
pass
|
49
|
-
|
50
|
-
|
51
|
-
@dataclass
|
52
|
-
class GivenSpec(SpecComponent):
|
53
|
-
value: Graph = None
|
54
|
-
|
55
|
-
|
56
|
-
@dataclass
|
57
|
-
class WhenSpec(SpecComponent):
|
58
|
-
value: str = None
|
59
|
-
queryType: URIRef = None
|
60
|
-
bindings: dict = None
|
61
|
-
|
62
|
-
@dataclass
|
63
|
-
class AnzoWhenSpec(WhenSpec):
|
64
|
-
paramQuery: str = None
|
65
|
-
queryTemplate: str = None
|
66
|
-
|
67
|
-
@dataclass
|
68
|
-
class ThenSpec(SpecComponent):
|
69
|
-
value: Graph = Graph()
|
70
|
-
ordered: bool = False
|
71
|
-
|
72
|
-
|
73
|
-
@dataclass
|
74
|
-
class TableThenSpec(ThenSpec):
|
75
|
-
value: pandas.DataFrame = field(default_factory=pandas.DataFrame)
|
76
|
-
|
77
|
-
|
78
|
-
@dataclass
|
79
|
-
class SpecComponentDetails:
|
80
|
-
subject: URIRef
|
81
|
-
predicate: URIRef
|
82
|
-
spec_graph: Graph
|
83
|
-
mustrd_triple_store: dict
|
84
|
-
spec_component_node: Node
|
85
|
-
data_source_type: Node
|
86
|
-
run_config: dict
|
87
|
-
root_paths: list
|
88
|
-
|
89
|
-
def get_path(path_type: str, file_name, spec_component_details: SpecComponentDetails) -> Path:
|
90
|
-
if path_type in spec_component_details.run_config:
|
91
|
-
relative_path = os.path.join(spec_component_details.run_config[path_type], file_name)
|
92
|
-
else:
|
93
|
-
relative_path = file_name
|
94
|
-
return get_file_absolute_path(spec_component_details, relative_path)
|
95
|
-
|
96
|
-
|
97
|
-
def parse_spec_component(subject: URIRef,
|
98
|
-
predicate: URIRef,
|
99
|
-
spec_graph: Graph,
|
100
|
-
run_config: dict,
|
101
|
-
mustrd_triple_store: dict) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec:
|
102
|
-
# print(f"parse_spec_component {subject=} {predicate=} ")
|
103
|
-
spec_component_nodes = get_spec_component_nodes(subject, predicate, spec_graph)
|
104
|
-
# all_data_source_types = []
|
105
|
-
spec_components = []
|
106
|
-
for spec_component_node in spec_component_nodes:
|
107
|
-
data_source_types = get_data_source_types(subject, predicate, spec_graph, spec_component_node)
|
108
|
-
for data_source_type in data_source_types:
|
109
|
-
spec_component_details = SpecComponentDetails(
|
110
|
-
subject=subject,
|
111
|
-
predicate=predicate,
|
112
|
-
spec_graph=spec_graph,
|
113
|
-
mustrd_triple_store=mustrd_triple_store,
|
114
|
-
spec_component_node=spec_component_node,
|
115
|
-
data_source_type=data_source_type,
|
116
|
-
run_config=run_config,
|
117
|
-
root_paths=get_components_roots(spec_graph, subject, run_config))
|
118
|
-
spec_component = get_spec_component(spec_component_details)
|
119
|
-
if type(spec_component) == list:
|
120
|
-
spec_components += spec_component
|
121
|
-
else:
|
122
|
-
spec_components += [spec_component]
|
123
|
-
|
124
|
-
# all_data_source_types.extend(data_source_types)
|
125
|
-
# return all_data_source_types
|
126
|
-
# merge multiple graphs into one, give error if spec config is a TableThen
|
127
|
-
# print(f"calling multimethod with {spec_components}")
|
128
|
-
return combine_specs(spec_components)
|
129
|
-
|
130
|
-
# Here we retrieve all the possible root paths for a specification component.
|
131
|
-
# This defines the order of priority between root paths which is:
|
132
|
-
# 1) Path where the spec is located
|
133
|
-
# 2) spec_path defined in mustrd test configuration files or cmd line argument
|
134
|
-
# 3) data_path defined in mustrd test configuration files or cmd line argument
|
135
|
-
# 4) Mustrd source folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
|
136
|
-
# We intentionally don't try for absolute files, but you should feel free to argue that we should do
|
137
|
-
def get_components_roots(spec_graph: Graph, subject: URIRef, run_config: dict):
|
138
|
-
where_did_i_load_this_spec_from = spec_graph.value(subject=subject,
|
139
|
-
predicate=MUST.specSourceFile)
|
140
|
-
roots = []
|
141
|
-
if (where_did_i_load_this_spec_from == None):
|
142
|
-
log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_graph}")
|
143
|
-
else:
|
144
|
-
roots.append(Path(os.path.dirname(where_did_i_load_this_spec_from)))
|
145
|
-
if run_config and'spec_path' in run_config:
|
146
|
-
roots.append(Path(run_config['spec_path']))
|
147
|
-
if run_config and 'data_path' in run_config:
|
148
|
-
roots.append(run_config['data_path'])
|
149
|
-
roots.append(get_mustrd_root())
|
150
|
-
|
151
|
-
return roots
|
152
|
-
|
153
|
-
|
154
|
-
# From the list of component potential roots, return the first path that exists
|
155
|
-
def get_file_absolute_path(spec_component_details: SpecComponentDetails, relative_file_path: str):
|
156
|
-
if not relative_file_path:
|
157
|
-
raise ValueError("Cannot get absolute path of None")
|
158
|
-
absolute_file_paths = list(map(lambda root_path: Path(os.path.join(root_path, relative_file_path)), spec_component_details.root_paths))
|
159
|
-
for absolute_file_path in absolute_file_paths:
|
160
|
-
if (os.path.exists(absolute_file_path)):
|
161
|
-
return absolute_file_path
|
162
|
-
raise FileNotFoundError(f"Could not find file {relative_file_path=} in any of the {absolute_file_paths=}")
|
163
|
-
|
164
|
-
def get_spec_component_type(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
|
165
|
-
# Get the type of the first object in the list
|
166
|
-
spec_type = type(spec_components[0])
|
167
|
-
# Loop through the remaining objects in the list and check their types
|
168
|
-
for spec_component in spec_components[1:]:
|
169
|
-
if type(spec_component) != spec_type:
|
170
|
-
# If an object has a different type, raise an error
|
171
|
-
raise ValueError("All spec components must be of the same type")
|
172
|
-
|
173
|
-
# If all objects have the same type, return the type
|
174
|
-
return spec_type
|
175
|
-
|
176
|
-
|
177
|
-
def combine_specs_dispatch(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
|
178
|
-
spec_type = get_spec_component_type(spec_components)
|
179
|
-
return spec_type
|
180
|
-
|
181
|
-
|
182
|
-
combine_specs = MultiMethod("combine_specs", combine_specs_dispatch)
|
183
|
-
|
184
|
-
|
185
|
-
@combine_specs.method(GivenSpec)
|
186
|
-
def _combine_given_specs(spec_components: List[GivenSpec]) -> GivenSpec:
|
187
|
-
if len(spec_components) == 1:
|
188
|
-
return spec_components[0]
|
189
|
-
else:
|
190
|
-
graph = Graph()
|
191
|
-
for spec_component in spec_components:
|
192
|
-
graph += spec_component.value
|
193
|
-
given_spec = GivenSpec()
|
194
|
-
given_spec.value = graph
|
195
|
-
return given_spec
|
196
|
-
|
197
|
-
|
198
|
-
@combine_specs.method(WhenSpec)
|
199
|
-
def _combine_when_specs(spec_components: List[WhenSpec]) -> WhenSpec:
|
200
|
-
return spec_components
|
201
|
-
|
202
|
-
|
203
|
-
@combine_specs.method(ThenSpec)
|
204
|
-
def _combine_then_specs(spec_components: List[ThenSpec]) -> ThenSpec:
|
205
|
-
if len(spec_components) == 1:
|
206
|
-
return spec_components[0]
|
207
|
-
else:
|
208
|
-
graph = Graph()
|
209
|
-
for spec_component in spec_components:
|
210
|
-
graph += spec_component.value
|
211
|
-
then_spec = ThenSpec()
|
212
|
-
then_spec.value = graph
|
213
|
-
return then_spec
|
214
|
-
|
215
|
-
|
216
|
-
@combine_specs.method(TableThenSpec)
|
217
|
-
def _combine_table_then_specs(spec_components: List[TableThenSpec]) -> TableThenSpec:
|
218
|
-
if len(spec_components) != 1:
|
219
|
-
raise ValueError(f"Parsing of multiple components of MUST.then for tables not implemented")
|
220
|
-
return spec_components[0]
|
221
|
-
|
222
|
-
|
223
|
-
@combine_specs.method(Default)
|
224
|
-
def _combine_specs_default(spec_components: List[SpecComponent]):
|
225
|
-
raise ValueError(f"Parsing of multiple components of this type not implemented {spec_components}")
|
226
|
-
|
227
|
-
def get_data_source_types(subject: URIRef, predicate: URIRef, spec_graph: Graph, source_node: Node) -> List[Node]:
|
228
|
-
data_source_types = []
|
229
|
-
for data_source_type in spec_graph.objects(subject=source_node, predicate=RDF.type):
|
230
|
-
data_source_types.append(data_source_type)
|
231
|
-
# data_source_type = spec_graph.value(subject=source_node, predicate=RDF.type)
|
232
|
-
if len(data_source_types) == 0:
|
233
|
-
raise ValueError(f"Node has no rdf type {subject} {predicate}")
|
234
|
-
return data_source_types
|
235
|
-
|
236
|
-
# https://github.com/Semantic-partners/mustrd/issues/99
|
237
|
-
def get_spec_component_dispatch(spec_component_details: SpecComponentDetails) -> Tuple[Node, URIRef]:
|
238
|
-
return spec_component_details.data_source_type, spec_component_details.predicate
|
239
|
-
|
240
|
-
|
241
|
-
get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispatch)
|
242
|
-
|
243
|
-
|
244
|
-
@get_spec_component.method((MUST.InheritedDataset, MUST.given))
|
245
|
-
def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
246
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
247
|
-
return spec_component
|
248
|
-
|
249
|
-
|
250
|
-
@get_spec_component.method((MUST.FolderDataset, MUST.given))
|
251
|
-
def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
252
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
253
|
-
|
254
|
-
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
255
|
-
predicate=MUST.fileName)
|
256
|
-
|
257
|
-
path = get_path('given_path', file_name,spec_component_details)
|
258
|
-
try:
|
259
|
-
spec_component.value = Graph().parse(data=get_spec_component_from_file(path))
|
260
|
-
except ParserError as e:
|
261
|
-
log.error(f"Problem parsing {path}, error of type {type(e)}")
|
262
|
-
raise ValueError(f"Problem parsing {path}, error of type {type(e)}")
|
263
|
-
return spec_component
|
264
|
-
|
265
|
-
|
266
|
-
@get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
|
267
|
-
def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
268
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
269
|
-
|
270
|
-
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
271
|
-
predicate=MUST.fileName)
|
272
|
-
|
273
|
-
path = get_path('when_path', file_name,spec_component_details)
|
274
|
-
spec_component.value = get_spec_component_from_file(path)
|
275
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
276
|
-
predicate=MUST.queryType)
|
277
|
-
return spec_component
|
278
|
-
|
279
|
-
|
280
|
-
@get_spec_component.method((MUST.FolderDataset, MUST.then))
|
281
|
-
def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
282
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
283
|
-
|
284
|
-
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
285
|
-
predicate=MUST.fileName)
|
286
|
-
path = get_path('then_path', file_name,spec_component_details)
|
287
|
-
|
288
|
-
return load_dataset_from_file(path, spec_component)
|
289
|
-
|
290
|
-
@get_spec_component.method((MUST.FileDataset, MUST.given))
|
291
|
-
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
292
|
-
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
293
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
294
|
-
return load_spec_component(spec_component_details, spec_component)
|
295
|
-
|
296
|
-
def load_spec_component(spec_component_details, spec_component):
|
297
|
-
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
298
|
-
predicate=MUST.file)))
|
299
|
-
return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
|
300
|
-
|
301
|
-
|
302
|
-
def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
303
|
-
if path.is_dir():
|
304
|
-
raise ValueError(f"Path {path} is a directory, expected a file")
|
305
|
-
|
306
|
-
# https://github.com/Semantic-partners/mustrd/issues/94
|
307
|
-
if path.suffix in {".csv", ".xlsx", ".xls"}:
|
308
|
-
df = pandas.read_csv(path) if path.suffix == ".csv" else pandas.read_excel(path)
|
309
|
-
then_spec = TableThenSpec()
|
310
|
-
then_spec.value = df
|
311
|
-
return then_spec
|
312
|
-
else:
|
313
|
-
try:
|
314
|
-
file_format = util.guess_format(str(path))
|
315
|
-
except AttributeError:
|
316
|
-
raise ValueError(f"Unsupported file format: {path.suffix}")
|
317
|
-
|
318
|
-
if file_format is not None:
|
319
|
-
g = Graph()
|
320
|
-
try:
|
321
|
-
g.parse(data=get_spec_component_from_file(path), format=file_format)
|
322
|
-
except ParserError as e:
|
323
|
-
log.error(f"Problem parsing {path}, error of type {type(e)}")
|
324
|
-
raise ValueError(f"Problem parsing {path}, error of type {type(e)}")
|
325
|
-
spec_component.value = g
|
326
|
-
return spec_component
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
@get_spec_component.method((MUST.FileSparqlSource, MUST.when))
|
331
|
-
def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
332
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
333
|
-
|
334
|
-
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
335
|
-
predicate=MUST.file)))
|
336
|
-
spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
|
337
|
-
|
338
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
339
|
-
predicate=MUST.queryType)
|
340
|
-
|
341
|
-
return spec_component
|
342
|
-
|
343
|
-
|
344
|
-
# @get_spec_component.method((MUST.FileDataset, MUST.then))
|
345
|
-
# def _get_spec_component_filedatasource_then(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
346
|
-
# spec_component = init_spec_component(spec_component_details.predicate)
|
347
|
-
|
348
|
-
# file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
349
|
-
# predicate=MUST.file)))
|
350
|
-
# if str(file_path).startswith("/"): # absolute path
|
351
|
-
# path = file_path
|
352
|
-
# else: #relative path
|
353
|
-
# path = Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
|
354
|
-
# return get_then_from_file(path, spec_component)
|
355
|
-
|
356
|
-
|
357
|
-
@get_spec_component.method((MUST.TextSparqlSource, MUST.when))
|
358
|
-
def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
359
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
360
|
-
|
361
|
-
# Get specComponent directly from config file (in text string)
|
362
|
-
spec_component.value = str(
|
363
|
-
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
364
|
-
predicate=MUST.queryText))
|
365
|
-
|
366
|
-
spec_component.bindings = get_when_bindings(spec_component_details.subject, spec_component_details.spec_graph)
|
367
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
368
|
-
predicate=MUST.queryType)
|
369
|
-
return spec_component
|
370
|
-
|
371
|
-
|
372
|
-
# https://github.com/Semantic-partners/mustrd/issues/98
|
373
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
374
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
375
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
376
|
-
def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
377
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
378
|
-
|
379
|
-
# Get specComponent with http GET protocol
|
380
|
-
spec_component.value = requests.get(str(
|
381
|
-
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
382
|
-
predicate=MUST.dataSourceUrl)).content)
|
383
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
384
|
-
predicate=MUST.queryType)
|
385
|
-
return spec_component
|
386
|
-
|
387
|
-
|
388
|
-
@get_spec_component.method((MUST.TableDataset, MUST.then))
|
389
|
-
def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
390
|
-
table_then = TableThenSpec()
|
391
|
-
# get specComponent from ttl table
|
392
|
-
table_then.value = get_spec_from_table(spec_component_details.subject, spec_component_details.predicate,
|
393
|
-
spec_component_details.spec_graph)
|
394
|
-
table_then.ordered = is_then_select_ordered(spec_component_details.subject, spec_component_details.predicate,
|
395
|
-
spec_component_details.spec_graph)
|
396
|
-
return table_then
|
397
|
-
|
398
|
-
|
399
|
-
@get_spec_component.method((MUST.EmptyTable, MUST.then))
|
400
|
-
def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
401
|
-
spec_component = TableThenSpec()
|
402
|
-
return spec_component
|
403
|
-
|
404
|
-
|
405
|
-
@get_spec_component.method((MUST.EmptyGraph, MUST.then))
|
406
|
-
def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
407
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
408
|
-
|
409
|
-
return spec_component
|
410
|
-
|
411
|
-
|
412
|
-
@get_spec_component.method((MUST.StatementsDataset, MUST.given))
|
413
|
-
@get_spec_component.method((MUST.StatementsDataset, MUST.then))
|
414
|
-
def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
415
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
416
|
-
|
417
|
-
spec_component.value = Graph().parse(
|
418
|
-
data=get_spec_from_statements(spec_component_details.subject, spec_component_details.predicate,
|
419
|
-
spec_component_details.spec_graph))
|
420
|
-
return spec_component
|
421
|
-
|
422
|
-
|
423
|
-
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
|
424
|
-
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
|
425
|
-
def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
426
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
427
|
-
|
428
|
-
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
429
|
-
# Get GIVEN or THEN from anzo graphmart
|
430
|
-
graphmart = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
431
|
-
predicate=MUST.graphmart)
|
432
|
-
layer = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
433
|
-
predicate=MUST.layer)
|
434
|
-
spec_component.value = get_spec_component_from_graphmart(
|
435
|
-
triple_store=spec_component_details.mustrd_triple_store,
|
436
|
-
graphmart=graphmart,
|
437
|
-
layer=layer)
|
438
|
-
else:
|
439
|
-
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartDataset}")
|
440
|
-
|
441
|
-
return spec_component
|
442
|
-
|
443
|
-
|
444
|
-
@get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
|
445
|
-
def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
446
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
447
|
-
|
448
|
-
# Get WHEN specComponent from query builder
|
449
|
-
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
450
|
-
query_folder = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
451
|
-
predicate=MUST.queryFolder)
|
452
|
-
query_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
453
|
-
predicate=MUST.queryName)
|
454
|
-
spec_component.value = get_query_from_querybuilder(triple_store=spec_component_details.mustrd_triple_store,
|
455
|
-
folder_name=query_folder,
|
456
|
-
query_name=query_name)
|
457
|
-
# If anzo specific function is called but no anzo defined
|
458
|
-
else:
|
459
|
-
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoQueryBuilderSparqlSource}")
|
460
|
-
|
461
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
462
|
-
predicate=MUST.queryType)
|
463
|
-
return spec_component
|
464
|
-
|
465
|
-
|
466
|
-
@get_spec_component.method((MUST.AnzoGraphmartStepSparqlSource, MUST.when))
|
467
|
-
def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
468
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
469
|
-
|
470
|
-
# Get WHEN specComponent from query builder
|
471
|
-
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
472
|
-
query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
473
|
-
predicate=MUST.anzoQueryStep)
|
474
|
-
spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
|
475
|
-
query_step_uri=query_step_uri)
|
476
|
-
# If anzo specific function is called but no anzo defined
|
477
|
-
else:
|
478
|
-
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartStepSparqlSource}")
|
479
|
-
|
480
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
481
|
-
predicate=MUST.queryType)
|
482
|
-
return spec_component
|
483
|
-
|
484
|
-
@get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
|
485
|
-
def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
486
|
-
spec_component = init_spec_component(spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"] )
|
487
|
-
|
488
|
-
# Get WHEN specComponent from query builder
|
489
|
-
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
490
|
-
query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
491
|
-
predicate=MUST.anzoQueryStep)
|
492
|
-
queries = get_queries_from_templated_step(triple_store=spec_component_details.mustrd_triple_store,
|
493
|
-
query_step_uri=query_step_uri)
|
494
|
-
spec_component.paramQuery= queries["param_query"]
|
495
|
-
spec_component.queryTemplate = queries["query_template"]
|
496
|
-
# If anzo specific function is called but no anzo defined
|
497
|
-
else:
|
498
|
-
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource}")
|
499
|
-
|
500
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
501
|
-
predicate=MUST.queryType)
|
502
|
-
return spec_component
|
503
|
-
|
504
|
-
@get_spec_component.method((MUST.AnzoGraphmartLayerSparqlSource, MUST.when))
|
505
|
-
def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: SpecComponentDetails) -> list:
|
506
|
-
spec_components = []
|
507
|
-
# Get the ordered WHEN specComponents which is the transform and query driven template queries for the Layer
|
508
|
-
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
509
|
-
graphmart_layer_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
510
|
-
predicate=MUST.anzoGraphmartLayer)
|
511
|
-
queries = get_queries_for_layer(triple_store=spec_component_details.mustrd_triple_store,
|
512
|
-
graphmart_layer_uri=graphmart_layer_uri)
|
513
|
-
# If anzo specific function is called but no anzo defined
|
514
|
-
else:
|
515
|
-
raise ValueError(f"This test specification is specific to Anzo and can only be run against that platform.")
|
516
|
-
for query in queries:
|
517
|
-
spec_component = init_spec_component(spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
518
|
-
spec_component.value = query.get("query")
|
519
|
-
spec_component.paramQuery = query.get("param_query")
|
520
|
-
spec_component.queryTemplate = query.get("query_template")
|
521
|
-
if spec_component.value:
|
522
|
-
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
523
|
-
predicate=MUST.queryType)
|
524
|
-
else:
|
525
|
-
spec_component.queryType = MUST.AnzoQueryDrivenUpdateSparql
|
526
|
-
spec_components += [spec_component]
|
527
|
-
return spec_components
|
528
|
-
|
529
|
-
@get_spec_component.method(Default)
|
530
|
-
def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
531
|
-
raise ValueError(
|
532
|
-
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
533
|
-
f"spec component ({spec_component_details.predicate})")
|
534
|
-
|
535
|
-
|
536
|
-
def init_spec_component(predicate: URIRef, triple_store_type: URIRef = None ) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec:
|
537
|
-
if predicate == MUST.given:
|
538
|
-
spec_component = GivenSpec()
|
539
|
-
elif predicate == MUST.when:
|
540
|
-
if triple_store_type == TRIPLESTORE.Anzo:
|
541
|
-
spec_component = AnzoWhenSpec()
|
542
|
-
else:
|
543
|
-
spec_component = WhenSpec()
|
544
|
-
elif predicate == MUST.then:
|
545
|
-
spec_component = ThenSpec()
|
546
|
-
else:
|
547
|
-
spec_component = SpecComponent()
|
548
|
-
return spec_component
|
549
|
-
|
550
|
-
|
551
|
-
def get_spec_component_nodes(subject: URIRef, predicate: URIRef, spec_graph: Graph) -> List[Node]:
|
552
|
-
spec_component_nodes = []
|
553
|
-
for spec_component_node in spec_graph.objects(subject=subject, predicate=predicate):
|
554
|
-
spec_component_nodes.append(spec_component_node)
|
555
|
-
# It shouldn't even be possible to get this far as an empty node indicates an invalid RDF file
|
556
|
-
if spec_component_nodes is None:
|
557
|
-
raise ValueError(f"specComponent Node empty for {subject} {predicate}")
|
558
|
-
return spec_component_nodes
|
559
|
-
|
560
|
-
|
561
|
-
def get_spec_component_from_file(path: Path) -> str:
|
562
|
-
# project_root = get_project_root()
|
563
|
-
# file_path = Path(os.path.join(project_root, path))
|
564
|
-
|
565
|
-
if path.is_dir():
|
566
|
-
raise ValueError(f"Path {path} is a directory, expected a file")
|
567
|
-
|
568
|
-
try:
|
569
|
-
content = path.read_text()
|
570
|
-
except FileNotFoundError:
|
571
|
-
raise
|
572
|
-
return str(content)
|
573
|
-
|
574
|
-
|
575
|
-
def get_spec_from_statements(subject: URIRef,
|
576
|
-
predicate: URIRef,
|
577
|
-
spec_graph: Graph) -> Graph:
|
578
|
-
statements_query = f"""
|
579
|
-
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
580
|
-
|
581
|
-
CONSTRUCT {{ ?s ?p ?o }}
|
582
|
-
{{
|
583
|
-
<{subject}> <{predicate}> [
|
584
|
-
a <{MUST.StatementsDataset}> ;
|
585
|
-
<{MUST.hasStatement}> [
|
586
|
-
a rdf:Statement ;
|
587
|
-
rdf:subject ?s ;
|
588
|
-
rdf:predicate ?p ;
|
589
|
-
rdf:object ?o ;
|
590
|
-
] ;
|
591
|
-
]
|
592
|
-
|
593
|
-
}}
|
594
|
-
"""
|
595
|
-
results = spec_graph.query(statements_query).graph
|
596
|
-
return results.serialize(format="ttl")
|
597
|
-
|
598
|
-
|
599
|
-
def get_spec_from_table(subject: URIRef,
|
600
|
-
predicate: URIRef,
|
601
|
-
spec_graph: Graph) -> pandas.DataFrame:
|
602
|
-
# query the spec to get the expected result to convert to dataframe for comparison
|
603
|
-
then_query = f"""
|
604
|
-
prefix sh: <http://www.w3.org/ns/shacl#>
|
605
|
-
SELECT ?row ?variable ?binding ?order
|
606
|
-
WHERE {{
|
607
|
-
<{subject}> <{predicate}> [
|
608
|
-
a <{MUST.TableDataset}> ;
|
609
|
-
<{MUST.hasRow}> ?row ].
|
610
|
-
?row <{MUST.hasBinding}> [
|
611
|
-
<{MUST.variable}> ?variable ;
|
612
|
-
<{MUST.boundValue}> ?binding ; ] .
|
613
|
-
OPTIONAL {{ ?row sh:order ?order . }}
|
614
|
-
.}}
|
615
|
-
ORDER BY ?order"""
|
616
|
-
|
617
|
-
expected_results = spec_graph.query(then_query)
|
618
|
-
# get the unique row ids form the result to form the index of the results dataframe
|
619
|
-
index = {str(row.row) for row in expected_results}
|
620
|
-
# get the unique variables to form the columns of the results dataframe
|
621
|
-
columns = set()
|
622
|
-
for row in expected_results:
|
623
|
-
columns.add(row.variable.value)
|
624
|
-
columns.add(row.variable.value + "_datatype")
|
625
|
-
# add an additional column for the sort order (if any) of the results
|
626
|
-
columns.add("order")
|
627
|
-
# create an empty dataframe to populate with the results
|
628
|
-
df = pandas.DataFrame(index=list(index), columns=list(columns))
|
629
|
-
# fill the dataframe with the results data
|
630
|
-
for row in expected_results:
|
631
|
-
df.loc[str(row.row), row.variable.value] = str(row.binding)
|
632
|
-
df.loc[str(row.row), "order"] = row.order
|
633
|
-
if type(row.binding) == Literal:
|
634
|
-
literal_type = str(XSD.string)
|
635
|
-
if hasattr(row.binding, "datatype") and row.binding.datatype:
|
636
|
-
literal_type = str(row.binding.datatype)
|
637
|
-
df.loc[str(row.row), row.variable.value + "_datatype"] = literal_type
|
638
|
-
else:
|
639
|
-
df.loc[str(row.row), row.variable.value + "_datatype"] = str(XSD.anyURI)
|
640
|
-
# use the sort order sort the results
|
641
|
-
df.sort_values(by="order", inplace=True)
|
642
|
-
# drop the order column and replace the rowid index with a numeric one and replace empty values with spaces
|
643
|
-
df.drop(columns="order", inplace=True)
|
644
|
-
df.reset_index(drop=True, inplace=True)
|
645
|
-
df.fillna('', inplace=True)
|
646
|
-
return df
|
647
|
-
|
648
|
-
|
649
|
-
def get_when_bindings(subject: URIRef,
|
650
|
-
spec_graph: Graph) -> dict:
|
651
|
-
when_bindings_query = f"""SELECT ?variable ?binding {{ <{subject}> <{MUST.when}> [ a <{MUST.TextSparqlSource}> ; <{MUST.hasBinding}> [ <{MUST.variable}> ?variable ; <{MUST.boundValue}> ?binding ; ] ; ] ;}}"""
|
652
|
-
when_bindings = spec_graph.query(when_bindings_query)
|
653
|
-
|
654
|
-
if len(when_bindings.bindings) == 0:
|
655
|
-
return {}
|
656
|
-
else:
|
657
|
-
bindings = {}
|
658
|
-
for binding in when_bindings:
|
659
|
-
bindings[Variable(binding.variable.value)] = binding.binding
|
660
|
-
return bindings
|
661
|
-
|
662
|
-
|
663
|
-
def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph) -> bool:
|
664
|
-
ask_select_ordered = f"""
|
665
|
-
ASK {{
|
666
|
-
{{SELECT (count(?binding) as ?totalBindings) {{
|
667
|
-
<{subject}> <{predicate}> [
|
668
|
-
a <{MUST.TableDataset}> ;
|
669
|
-
<{MUST.hasRow}> [ <{MUST.hasBinding}> [
|
670
|
-
<{MUST.variable}> ?variable ;
|
671
|
-
<{MUST.boundValue}> ?binding ;
|
672
|
-
] ;
|
673
|
-
]
|
674
|
-
]
|
675
|
-
}} }}
|
676
|
-
{{SELECT (count(?binding) as ?orderedBindings) {{
|
677
|
-
<{subject}> <{predicate}> [
|
678
|
-
a <{MUST.TableDataset}> ;
|
679
|
-
<{MUST.hasRow}> [ sh:order ?order ;
|
680
|
-
<{MUST.hasBinding}> [
|
681
|
-
<{MUST.variable}> ?variable ;
|
682
|
-
<{MUST.boundValue}> ?binding ;
|
683
|
-
] ;
|
684
|
-
]
|
685
|
-
]
|
686
|
-
}} }}
|
687
|
-
FILTER(?totalBindings = ?orderedBindings)
|
688
|
-
}}"""
|
689
|
-
is_ordered = spec_graph.query(ask_select_ordered)
|
690
|
-
return is_ordered.askAnswer
|
1
|
+
"""
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Copyright (c) 2023 Semantic Partners Ltd
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
23
|
+
"""
|
24
|
+
|
25
|
+
import os
|
26
|
+
from dataclasses import dataclass, field
|
27
|
+
from pathlib import Path
|
28
|
+
from typing import Tuple, List, Type
|
29
|
+
|
30
|
+
import pandas
|
31
|
+
import requests
|
32
|
+
from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util
|
33
|
+
from rdflib.exceptions import ParserError
|
34
|
+
from rdflib.term import Node
|
35
|
+
import logging
|
36
|
+
|
37
|
+
from . import logger_setup
|
38
|
+
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart, get_query_from_querybuilder, get_query_from_step
|
39
|
+
from .namespace import MUST, TRIPLESTORE
|
40
|
+
from multimethods import MultiMethod, Default
|
41
|
+
from .utils import get_mustrd_root
|
42
|
+
|
43
|
+
log = logger_setup.setup_logger(__name__)
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass
|
47
|
+
class SpecComponent:
|
48
|
+
pass
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class GivenSpec(SpecComponent):
|
53
|
+
value: Graph = None
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class WhenSpec(SpecComponent):
|
58
|
+
value: str = None
|
59
|
+
queryType: URIRef = None
|
60
|
+
bindings: dict = None
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class AnzoWhenSpec(WhenSpec):
|
64
|
+
paramQuery: str = None
|
65
|
+
queryTemplate: str = None
|
66
|
+
|
67
|
+
@dataclass
|
68
|
+
class ThenSpec(SpecComponent):
|
69
|
+
value: Graph = Graph()
|
70
|
+
ordered: bool = False
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class TableThenSpec(ThenSpec):
|
75
|
+
value: pandas.DataFrame = field(default_factory=pandas.DataFrame)
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class SpecComponentDetails:
|
80
|
+
subject: URIRef
|
81
|
+
predicate: URIRef
|
82
|
+
spec_graph: Graph
|
83
|
+
mustrd_triple_store: dict
|
84
|
+
spec_component_node: Node
|
85
|
+
data_source_type: Node
|
86
|
+
run_config: dict
|
87
|
+
root_paths: list
|
88
|
+
|
89
|
+
def get_path(path_type: str, file_name, spec_component_details: SpecComponentDetails) -> Path:
|
90
|
+
if path_type in spec_component_details.run_config:
|
91
|
+
relative_path = os.path.join(spec_component_details.run_config[path_type], file_name)
|
92
|
+
else:
|
93
|
+
relative_path = file_name
|
94
|
+
return get_file_absolute_path(spec_component_details, relative_path)
|
95
|
+
|
96
|
+
|
97
|
+
def parse_spec_component(subject: URIRef,
|
98
|
+
predicate: URIRef,
|
99
|
+
spec_graph: Graph,
|
100
|
+
run_config: dict,
|
101
|
+
mustrd_triple_store: dict) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec:
|
102
|
+
# print(f"parse_spec_component {subject=} {predicate=} ")
|
103
|
+
spec_component_nodes = get_spec_component_nodes(subject, predicate, spec_graph)
|
104
|
+
# all_data_source_types = []
|
105
|
+
spec_components = []
|
106
|
+
for spec_component_node in spec_component_nodes:
|
107
|
+
data_source_types = get_data_source_types(subject, predicate, spec_graph, spec_component_node)
|
108
|
+
for data_source_type in data_source_types:
|
109
|
+
spec_component_details = SpecComponentDetails(
|
110
|
+
subject=subject,
|
111
|
+
predicate=predicate,
|
112
|
+
spec_graph=spec_graph,
|
113
|
+
mustrd_triple_store=mustrd_triple_store,
|
114
|
+
spec_component_node=spec_component_node,
|
115
|
+
data_source_type=data_source_type,
|
116
|
+
run_config=run_config,
|
117
|
+
root_paths=get_components_roots(spec_graph, subject, run_config))
|
118
|
+
spec_component = get_spec_component(spec_component_details)
|
119
|
+
if type(spec_component) == list:
|
120
|
+
spec_components += spec_component
|
121
|
+
else:
|
122
|
+
spec_components += [spec_component]
|
123
|
+
|
124
|
+
# all_data_source_types.extend(data_source_types)
|
125
|
+
# return all_data_source_types
|
126
|
+
# merge multiple graphs into one, give error if spec config is a TableThen
|
127
|
+
# print(f"calling multimethod with {spec_components}")
|
128
|
+
return combine_specs(spec_components)
|
129
|
+
|
130
|
+
# Here we retrieve all the possible root paths for a specification component.
|
131
|
+
# This defines the order of priority between root paths which is:
|
132
|
+
# 1) Path where the spec is located
|
133
|
+
# 2) spec_path defined in mustrd test configuration files or cmd line argument
|
134
|
+
# 3) data_path defined in mustrd test configuration files or cmd line argument
|
135
|
+
# 4) Mustrd source folder: In case of default resources packaged with mustrd source (will be in venv when mustrd is called as library)
|
136
|
+
# We intentionally don't try for absolute files, but you should feel free to argue that we should do
|
137
|
+
def get_components_roots(spec_graph: Graph, subject: URIRef, run_config: dict):
|
138
|
+
where_did_i_load_this_spec_from = spec_graph.value(subject=subject,
|
139
|
+
predicate=MUST.specSourceFile)
|
140
|
+
roots = []
|
141
|
+
if (where_did_i_load_this_spec_from == None):
|
142
|
+
log.error(f"{where_did_i_load_this_spec_from=} was None for test_spec={subject}, we didn't set the test specifications specSourceFile when loading, spec_graph={spec_graph}")
|
143
|
+
else:
|
144
|
+
roots.append(Path(os.path.dirname(where_did_i_load_this_spec_from)))
|
145
|
+
if run_config and'spec_path' in run_config:
|
146
|
+
roots.append(Path(run_config['spec_path']))
|
147
|
+
if run_config and 'data_path' in run_config:
|
148
|
+
roots.append(run_config['data_path'])
|
149
|
+
roots.append(get_mustrd_root())
|
150
|
+
|
151
|
+
return roots
|
152
|
+
|
153
|
+
|
154
|
+
# From the list of component potential roots, return the first path that exists
|
155
|
+
def get_file_absolute_path(spec_component_details: SpecComponentDetails, relative_file_path: str):
|
156
|
+
if not relative_file_path:
|
157
|
+
raise ValueError("Cannot get absolute path of None")
|
158
|
+
absolute_file_paths = list(map(lambda root_path: Path(os.path.join(root_path, relative_file_path)), spec_component_details.root_paths))
|
159
|
+
for absolute_file_path in absolute_file_paths:
|
160
|
+
if (os.path.exists(absolute_file_path)):
|
161
|
+
return absolute_file_path
|
162
|
+
raise FileNotFoundError(f"Could not find file {relative_file_path=} in any of the {absolute_file_paths=}")
|
163
|
+
|
164
|
+
def get_spec_component_type(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
|
165
|
+
# Get the type of the first object in the list
|
166
|
+
spec_type = type(spec_components[0])
|
167
|
+
# Loop through the remaining objects in the list and check their types
|
168
|
+
for spec_component in spec_components[1:]:
|
169
|
+
if type(spec_component) != spec_type:
|
170
|
+
# If an object has a different type, raise an error
|
171
|
+
raise ValueError("All spec components must be of the same type")
|
172
|
+
|
173
|
+
# If all objects have the same type, return the type
|
174
|
+
return spec_type
|
175
|
+
|
176
|
+
|
177
|
+
def combine_specs_dispatch(spec_components: List[SpecComponent]) -> Type[SpecComponent]:
|
178
|
+
spec_type = get_spec_component_type(spec_components)
|
179
|
+
return spec_type
|
180
|
+
|
181
|
+
|
182
|
+
combine_specs = MultiMethod("combine_specs", combine_specs_dispatch)
|
183
|
+
|
184
|
+
|
185
|
+
@combine_specs.method(GivenSpec)
|
186
|
+
def _combine_given_specs(spec_components: List[GivenSpec]) -> GivenSpec:
|
187
|
+
if len(spec_components) == 1:
|
188
|
+
return spec_components[0]
|
189
|
+
else:
|
190
|
+
graph = Graph()
|
191
|
+
for spec_component in spec_components:
|
192
|
+
graph += spec_component.value
|
193
|
+
given_spec = GivenSpec()
|
194
|
+
given_spec.value = graph
|
195
|
+
return given_spec
|
196
|
+
|
197
|
+
|
198
|
+
@combine_specs.method(WhenSpec)
|
199
|
+
def _combine_when_specs(spec_components: List[WhenSpec]) -> WhenSpec:
|
200
|
+
return spec_components
|
201
|
+
|
202
|
+
|
203
|
+
@combine_specs.method(ThenSpec)
|
204
|
+
def _combine_then_specs(spec_components: List[ThenSpec]) -> ThenSpec:
|
205
|
+
if len(spec_components) == 1:
|
206
|
+
return spec_components[0]
|
207
|
+
else:
|
208
|
+
graph = Graph()
|
209
|
+
for spec_component in spec_components:
|
210
|
+
graph += spec_component.value
|
211
|
+
then_spec = ThenSpec()
|
212
|
+
then_spec.value = graph
|
213
|
+
return then_spec
|
214
|
+
|
215
|
+
|
216
|
+
@combine_specs.method(TableThenSpec)
|
217
|
+
def _combine_table_then_specs(spec_components: List[TableThenSpec]) -> TableThenSpec:
|
218
|
+
if len(spec_components) != 1:
|
219
|
+
raise ValueError(f"Parsing of multiple components of MUST.then for tables not implemented")
|
220
|
+
return spec_components[0]
|
221
|
+
|
222
|
+
|
223
|
+
@combine_specs.method(Default)
|
224
|
+
def _combine_specs_default(spec_components: List[SpecComponent]):
|
225
|
+
raise ValueError(f"Parsing of multiple components of this type not implemented {spec_components}")
|
226
|
+
|
227
|
+
def get_data_source_types(subject: URIRef, predicate: URIRef, spec_graph: Graph, source_node: Node) -> List[Node]:
|
228
|
+
data_source_types = []
|
229
|
+
for data_source_type in spec_graph.objects(subject=source_node, predicate=RDF.type):
|
230
|
+
data_source_types.append(data_source_type)
|
231
|
+
# data_source_type = spec_graph.value(subject=source_node, predicate=RDF.type)
|
232
|
+
if len(data_source_types) == 0:
|
233
|
+
raise ValueError(f"Node has no rdf type {subject} {predicate}")
|
234
|
+
return data_source_types
|
235
|
+
|
236
|
+
# https://github.com/Semantic-partners/mustrd/issues/99
|
237
|
+
def get_spec_component_dispatch(spec_component_details: SpecComponentDetails) -> Tuple[Node, URIRef]:
|
238
|
+
return spec_component_details.data_source_type, spec_component_details.predicate
|
239
|
+
|
240
|
+
|
241
|
+
get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispatch)
|
242
|
+
|
243
|
+
|
244
|
+
@get_spec_component.method((MUST.InheritedDataset, MUST.given))
|
245
|
+
def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
246
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
247
|
+
return spec_component
|
248
|
+
|
249
|
+
|
250
|
+
@get_spec_component.method((MUST.FolderDataset, MUST.given))
|
251
|
+
def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
252
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
253
|
+
|
254
|
+
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
255
|
+
predicate=MUST.fileName)
|
256
|
+
|
257
|
+
path = get_path('given_path', file_name,spec_component_details)
|
258
|
+
try:
|
259
|
+
spec_component.value = Graph().parse(data=get_spec_component_from_file(path))
|
260
|
+
except ParserError as e:
|
261
|
+
log.error(f"Problem parsing {path}, error of type {type(e)}")
|
262
|
+
raise ValueError(f"Problem parsing {path}, error of type {type(e)}")
|
263
|
+
return spec_component
|
264
|
+
|
265
|
+
|
266
|
+
@get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
|
267
|
+
def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
268
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
269
|
+
|
270
|
+
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
271
|
+
predicate=MUST.fileName)
|
272
|
+
|
273
|
+
path = get_path('when_path', file_name,spec_component_details)
|
274
|
+
spec_component.value = get_spec_component_from_file(path)
|
275
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
276
|
+
predicate=MUST.queryType)
|
277
|
+
return spec_component
|
278
|
+
|
279
|
+
|
280
|
+
@get_spec_component.method((MUST.FolderDataset, MUST.then))
|
281
|
+
def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
282
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
283
|
+
|
284
|
+
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
285
|
+
predicate=MUST.fileName)
|
286
|
+
path = get_path('then_path', file_name,spec_component_details)
|
287
|
+
|
288
|
+
return load_dataset_from_file(path, spec_component)
|
289
|
+
|
290
|
+
@get_spec_component.method((MUST.FileDataset, MUST.given))
|
291
|
+
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
292
|
+
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
293
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
294
|
+
return load_spec_component(spec_component_details, spec_component)
|
295
|
+
|
296
|
+
def load_spec_component(spec_component_details, spec_component):
|
297
|
+
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
298
|
+
predicate=MUST.file)))
|
299
|
+
return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
|
300
|
+
|
301
|
+
|
302
|
+
def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
303
|
+
if path.is_dir():
|
304
|
+
raise ValueError(f"Path {path} is a directory, expected a file")
|
305
|
+
|
306
|
+
# https://github.com/Semantic-partners/mustrd/issues/94
|
307
|
+
if path.suffix in {".csv", ".xlsx", ".xls"}:
|
308
|
+
df = pandas.read_csv(path) if path.suffix == ".csv" else pandas.read_excel(path)
|
309
|
+
then_spec = TableThenSpec()
|
310
|
+
then_spec.value = df
|
311
|
+
return then_spec
|
312
|
+
else:
|
313
|
+
try:
|
314
|
+
file_format = util.guess_format(str(path))
|
315
|
+
except AttributeError:
|
316
|
+
raise ValueError(f"Unsupported file format: {path.suffix}")
|
317
|
+
|
318
|
+
if file_format is not None:
|
319
|
+
g = Graph()
|
320
|
+
try:
|
321
|
+
g.parse(data=get_spec_component_from_file(path), format=file_format)
|
322
|
+
except ParserError as e:
|
323
|
+
log.error(f"Problem parsing {path}, error of type {type(e)}")
|
324
|
+
raise ValueError(f"Problem parsing {path}, error of type {type(e)}")
|
325
|
+
spec_component.value = g
|
326
|
+
return spec_component
|
327
|
+
|
328
|
+
|
329
|
+
|
330
|
+
@get_spec_component.method((MUST.FileSparqlSource, MUST.when))
|
331
|
+
def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
332
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
333
|
+
|
334
|
+
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
335
|
+
predicate=MUST.file)))
|
336
|
+
spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
|
337
|
+
|
338
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
339
|
+
predicate=MUST.queryType)
|
340
|
+
|
341
|
+
return spec_component
|
342
|
+
|
343
|
+
|
344
|
+
# @get_spec_component.method((MUST.FileDataset, MUST.then))
|
345
|
+
# def _get_spec_component_filedatasource_then(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
346
|
+
# spec_component = init_spec_component(spec_component_details.predicate)
|
347
|
+
|
348
|
+
# file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
349
|
+
# predicate=MUST.file)))
|
350
|
+
# if str(file_path).startswith("/"): # absolute path
|
351
|
+
# path = file_path
|
352
|
+
# else: #relative path
|
353
|
+
# path = Path(os.path.join(spec_component_details.run_config['spec_path'], file_path))
|
354
|
+
# return get_then_from_file(path, spec_component)
|
355
|
+
|
356
|
+
|
357
|
+
@get_spec_component.method((MUST.TextSparqlSource, MUST.when))
|
358
|
+
def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
359
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
360
|
+
|
361
|
+
# Get specComponent directly from config file (in text string)
|
362
|
+
spec_component.value = str(
|
363
|
+
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
364
|
+
predicate=MUST.queryText))
|
365
|
+
|
366
|
+
spec_component.bindings = get_when_bindings(spec_component_details.subject, spec_component_details.spec_graph)
|
367
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
368
|
+
predicate=MUST.queryType)
|
369
|
+
return spec_component
|
370
|
+
|
371
|
+
|
372
|
+
# https://github.com/Semantic-partners/mustrd/issues/98
|
373
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
374
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
375
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
376
|
+
def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
377
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
378
|
+
|
379
|
+
# Get specComponent with http GET protocol
|
380
|
+
spec_component.value = requests.get(str(
|
381
|
+
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
382
|
+
predicate=MUST.dataSourceUrl)).content)
|
383
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
384
|
+
predicate=MUST.queryType)
|
385
|
+
return spec_component
|
386
|
+
|
387
|
+
|
388
|
+
@get_spec_component.method((MUST.TableDataset, MUST.then))
|
389
|
+
def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
390
|
+
table_then = TableThenSpec()
|
391
|
+
# get specComponent from ttl table
|
392
|
+
table_then.value = get_spec_from_table(spec_component_details.subject, spec_component_details.predicate,
|
393
|
+
spec_component_details.spec_graph)
|
394
|
+
table_then.ordered = is_then_select_ordered(spec_component_details.subject, spec_component_details.predicate,
|
395
|
+
spec_component_details.spec_graph)
|
396
|
+
return table_then
|
397
|
+
|
398
|
+
|
399
|
+
@get_spec_component.method((MUST.EmptyTable, MUST.then))
|
400
|
+
def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
401
|
+
spec_component = TableThenSpec()
|
402
|
+
return spec_component
|
403
|
+
|
404
|
+
|
405
|
+
@get_spec_component.method((MUST.EmptyGraph, MUST.then))
|
406
|
+
def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
407
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
408
|
+
|
409
|
+
return spec_component
|
410
|
+
|
411
|
+
|
412
|
+
@get_spec_component.method((MUST.StatementsDataset, MUST.given))
|
413
|
+
@get_spec_component.method((MUST.StatementsDataset, MUST.then))
|
414
|
+
def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
415
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
416
|
+
|
417
|
+
spec_component.value = Graph().parse(
|
418
|
+
data=get_spec_from_statements(spec_component_details.subject, spec_component_details.predicate,
|
419
|
+
spec_component_details.spec_graph))
|
420
|
+
return spec_component
|
421
|
+
|
422
|
+
|
423
|
+
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
|
424
|
+
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
|
425
|
+
def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
426
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
427
|
+
|
428
|
+
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
429
|
+
# Get GIVEN or THEN from anzo graphmart
|
430
|
+
graphmart = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
431
|
+
predicate=MUST.graphmart)
|
432
|
+
layer = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
433
|
+
predicate=MUST.layer)
|
434
|
+
spec_component.value = get_spec_component_from_graphmart(
|
435
|
+
triple_store=spec_component_details.mustrd_triple_store,
|
436
|
+
graphmart=graphmart,
|
437
|
+
layer=layer)
|
438
|
+
else:
|
439
|
+
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartDataset}")
|
440
|
+
|
441
|
+
return spec_component
|
442
|
+
|
443
|
+
|
444
|
+
@get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
|
445
|
+
def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
446
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
447
|
+
|
448
|
+
# Get WHEN specComponent from query builder
|
449
|
+
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
450
|
+
query_folder = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
451
|
+
predicate=MUST.queryFolder)
|
452
|
+
query_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
453
|
+
predicate=MUST.queryName)
|
454
|
+
spec_component.value = get_query_from_querybuilder(triple_store=spec_component_details.mustrd_triple_store,
|
455
|
+
folder_name=query_folder,
|
456
|
+
query_name=query_name)
|
457
|
+
# If anzo specific function is called but no anzo defined
|
458
|
+
else:
|
459
|
+
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoQueryBuilderSparqlSource}")
|
460
|
+
|
461
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
462
|
+
predicate=MUST.queryType)
|
463
|
+
return spec_component
|
464
|
+
|
465
|
+
|
466
|
+
@get_spec_component.method((MUST.AnzoGraphmartStepSparqlSource, MUST.when))
|
467
|
+
def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
468
|
+
spec_component = init_spec_component(spec_component_details.predicate)
|
469
|
+
|
470
|
+
# Get WHEN specComponent from query builder
|
471
|
+
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
472
|
+
query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
473
|
+
predicate=MUST.anzoQueryStep)
|
474
|
+
spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
|
475
|
+
query_step_uri=query_step_uri)
|
476
|
+
# If anzo specific function is called but no anzo defined
|
477
|
+
else:
|
478
|
+
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartStepSparqlSource}")
|
479
|
+
|
480
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
481
|
+
predicate=MUST.queryType)
|
482
|
+
return spec_component
|
483
|
+
|
484
|
+
@get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
|
485
|
+
def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
486
|
+
spec_component = init_spec_component(spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"] )
|
487
|
+
|
488
|
+
# Get WHEN specComponent from query builder
|
489
|
+
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
490
|
+
query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
491
|
+
predicate=MUST.anzoQueryStep)
|
492
|
+
queries = get_queries_from_templated_step(triple_store=spec_component_details.mustrd_triple_store,
|
493
|
+
query_step_uri=query_step_uri)
|
494
|
+
spec_component.paramQuery= queries["param_query"]
|
495
|
+
spec_component.queryTemplate = queries["query_template"]
|
496
|
+
# If anzo specific function is called but no anzo defined
|
497
|
+
else:
|
498
|
+
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource}")
|
499
|
+
|
500
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
501
|
+
predicate=MUST.queryType)
|
502
|
+
return spec_component
|
503
|
+
|
504
|
+
@get_spec_component.method((MUST.AnzoGraphmartLayerSparqlSource, MUST.when))
|
505
|
+
def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: SpecComponentDetails) -> list:
|
506
|
+
spec_components = []
|
507
|
+
# Get the ordered WHEN specComponents which is the transform and query driven template queries for the Layer
|
508
|
+
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
509
|
+
graphmart_layer_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
510
|
+
predicate=MUST.anzoGraphmartLayer)
|
511
|
+
queries = get_queries_for_layer(triple_store=spec_component_details.mustrd_triple_store,
|
512
|
+
graphmart_layer_uri=graphmart_layer_uri)
|
513
|
+
# If anzo specific function is called but no anzo defined
|
514
|
+
else:
|
515
|
+
raise ValueError(f"This test specification is specific to Anzo and can only be run against that platform.")
|
516
|
+
for query in queries:
|
517
|
+
spec_component = init_spec_component(spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
518
|
+
spec_component.value = query.get("query")
|
519
|
+
spec_component.paramQuery = query.get("param_query")
|
520
|
+
spec_component.queryTemplate = query.get("query_template")
|
521
|
+
if spec_component.value:
|
522
|
+
spec_component.queryType = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
523
|
+
predicate=MUST.queryType)
|
524
|
+
else:
|
525
|
+
spec_component.queryType = MUST.AnzoQueryDrivenUpdateSparql
|
526
|
+
spec_components += [spec_component]
|
527
|
+
return spec_components
|
528
|
+
|
529
|
+
@get_spec_component.method(Default)
|
530
|
+
def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
531
|
+
raise ValueError(
|
532
|
+
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
533
|
+
f"spec component ({spec_component_details.predicate})")
|
534
|
+
|
535
|
+
|
536
|
+
def init_spec_component(predicate: URIRef, triple_store_type: URIRef = None ) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec:
|
537
|
+
if predicate == MUST.given:
|
538
|
+
spec_component = GivenSpec()
|
539
|
+
elif predicate == MUST.when:
|
540
|
+
if triple_store_type == TRIPLESTORE.Anzo:
|
541
|
+
spec_component = AnzoWhenSpec()
|
542
|
+
else:
|
543
|
+
spec_component = WhenSpec()
|
544
|
+
elif predicate == MUST.then:
|
545
|
+
spec_component = ThenSpec()
|
546
|
+
else:
|
547
|
+
spec_component = SpecComponent()
|
548
|
+
return spec_component
|
549
|
+
|
550
|
+
|
551
|
+
def get_spec_component_nodes(subject: URIRef, predicate: URIRef, spec_graph: Graph) -> List[Node]:
|
552
|
+
spec_component_nodes = []
|
553
|
+
for spec_component_node in spec_graph.objects(subject=subject, predicate=predicate):
|
554
|
+
spec_component_nodes.append(spec_component_node)
|
555
|
+
# It shouldn't even be possible to get this far as an empty node indicates an invalid RDF file
|
556
|
+
if spec_component_nodes is None:
|
557
|
+
raise ValueError(f"specComponent Node empty for {subject} {predicate}")
|
558
|
+
return spec_component_nodes
|
559
|
+
|
560
|
+
|
561
|
+
def get_spec_component_from_file(path: Path) -> str:
|
562
|
+
# project_root = get_project_root()
|
563
|
+
# file_path = Path(os.path.join(project_root, path))
|
564
|
+
|
565
|
+
if path.is_dir():
|
566
|
+
raise ValueError(f"Path {path} is a directory, expected a file")
|
567
|
+
|
568
|
+
try:
|
569
|
+
content = path.read_text()
|
570
|
+
except FileNotFoundError:
|
571
|
+
raise
|
572
|
+
return str(content)
|
573
|
+
|
574
|
+
|
575
|
+
def get_spec_from_statements(subject: URIRef,
|
576
|
+
predicate: URIRef,
|
577
|
+
spec_graph: Graph) -> Graph:
|
578
|
+
statements_query = f"""
|
579
|
+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
580
|
+
|
581
|
+
CONSTRUCT {{ ?s ?p ?o }}
|
582
|
+
{{
|
583
|
+
<{subject}> <{predicate}> [
|
584
|
+
a <{MUST.StatementsDataset}> ;
|
585
|
+
<{MUST.hasStatement}> [
|
586
|
+
a rdf:Statement ;
|
587
|
+
rdf:subject ?s ;
|
588
|
+
rdf:predicate ?p ;
|
589
|
+
rdf:object ?o ;
|
590
|
+
] ;
|
591
|
+
]
|
592
|
+
|
593
|
+
}}
|
594
|
+
"""
|
595
|
+
results = spec_graph.query(statements_query).graph
|
596
|
+
return results.serialize(format="ttl")
|
597
|
+
|
598
|
+
|
599
|
+
def get_spec_from_table(subject: URIRef,
|
600
|
+
predicate: URIRef,
|
601
|
+
spec_graph: Graph) -> pandas.DataFrame:
|
602
|
+
# query the spec to get the expected result to convert to dataframe for comparison
|
603
|
+
then_query = f"""
|
604
|
+
prefix sh: <http://www.w3.org/ns/shacl#>
|
605
|
+
SELECT ?row ?variable ?binding ?order
|
606
|
+
WHERE {{
|
607
|
+
<{subject}> <{predicate}> [
|
608
|
+
a <{MUST.TableDataset}> ;
|
609
|
+
<{MUST.hasRow}> ?row ].
|
610
|
+
?row <{MUST.hasBinding}> [
|
611
|
+
<{MUST.variable}> ?variable ;
|
612
|
+
<{MUST.boundValue}> ?binding ; ] .
|
613
|
+
OPTIONAL {{ ?row sh:order ?order . }}
|
614
|
+
.}}
|
615
|
+
ORDER BY ?order"""
|
616
|
+
|
617
|
+
expected_results = spec_graph.query(then_query)
|
618
|
+
# get the unique row ids form the result to form the index of the results dataframe
|
619
|
+
index = {str(row.row) for row in expected_results}
|
620
|
+
# get the unique variables to form the columns of the results dataframe
|
621
|
+
columns = set()
|
622
|
+
for row in expected_results:
|
623
|
+
columns.add(row.variable.value)
|
624
|
+
columns.add(row.variable.value + "_datatype")
|
625
|
+
# add an additional column for the sort order (if any) of the results
|
626
|
+
columns.add("order")
|
627
|
+
# create an empty dataframe to populate with the results
|
628
|
+
df = pandas.DataFrame(index=list(index), columns=list(columns))
|
629
|
+
# fill the dataframe with the results data
|
630
|
+
for row in expected_results:
|
631
|
+
df.loc[str(row.row), row.variable.value] = str(row.binding)
|
632
|
+
df.loc[str(row.row), "order"] = row.order
|
633
|
+
if type(row.binding) == Literal:
|
634
|
+
literal_type = str(XSD.string)
|
635
|
+
if hasattr(row.binding, "datatype") and row.binding.datatype:
|
636
|
+
literal_type = str(row.binding.datatype)
|
637
|
+
df.loc[str(row.row), row.variable.value + "_datatype"] = literal_type
|
638
|
+
else:
|
639
|
+
df.loc[str(row.row), row.variable.value + "_datatype"] = str(XSD.anyURI)
|
640
|
+
# use the sort order sort the results
|
641
|
+
df.sort_values(by="order", inplace=True)
|
642
|
+
# drop the order column and replace the rowid index with a numeric one and replace empty values with spaces
|
643
|
+
df.drop(columns="order", inplace=True)
|
644
|
+
df.reset_index(drop=True, inplace=True)
|
645
|
+
df.fillna('', inplace=True)
|
646
|
+
return df
|
647
|
+
|
648
|
+
|
649
|
+
def get_when_bindings(subject: URIRef,
|
650
|
+
spec_graph: Graph) -> dict:
|
651
|
+
when_bindings_query = f"""SELECT ?variable ?binding {{ <{subject}> <{MUST.when}> [ a <{MUST.TextSparqlSource}> ; <{MUST.hasBinding}> [ <{MUST.variable}> ?variable ; <{MUST.boundValue}> ?binding ; ] ; ] ;}}"""
|
652
|
+
when_bindings = spec_graph.query(when_bindings_query)
|
653
|
+
|
654
|
+
if len(when_bindings.bindings) == 0:
|
655
|
+
return {}
|
656
|
+
else:
|
657
|
+
bindings = {}
|
658
|
+
for binding in when_bindings:
|
659
|
+
bindings[Variable(binding.variable.value)] = binding.binding
|
660
|
+
return bindings
|
661
|
+
|
662
|
+
|
663
|
+
def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph) -> bool:
|
664
|
+
ask_select_ordered = f"""
|
665
|
+
ASK {{
|
666
|
+
{{SELECT (count(?binding) as ?totalBindings) {{
|
667
|
+
<{subject}> <{predicate}> [
|
668
|
+
a <{MUST.TableDataset}> ;
|
669
|
+
<{MUST.hasRow}> [ <{MUST.hasBinding}> [
|
670
|
+
<{MUST.variable}> ?variable ;
|
671
|
+
<{MUST.boundValue}> ?binding ;
|
672
|
+
] ;
|
673
|
+
]
|
674
|
+
]
|
675
|
+
}} }}
|
676
|
+
{{SELECT (count(?binding) as ?orderedBindings) {{
|
677
|
+
<{subject}> <{predicate}> [
|
678
|
+
a <{MUST.TableDataset}> ;
|
679
|
+
<{MUST.hasRow}> [ sh:order ?order ;
|
680
|
+
<{MUST.hasBinding}> [
|
681
|
+
<{MUST.variable}> ?variable ;
|
682
|
+
<{MUST.boundValue}> ?binding ;
|
683
|
+
] ;
|
684
|
+
]
|
685
|
+
]
|
686
|
+
}} }}
|
687
|
+
FILTER(?totalBindings = ?orderedBindings)
|
688
|
+
}}"""
|
689
|
+
is_ordered = spec_graph.query(ask_select_ordered)
|
690
|
+
return is_ordered.askAnswer
|