mustrd 0.3.0.0__py3-none-any.whl → 0.3.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.md +2 -0
- mustrd/logger_setup.py +2 -0
- mustrd/model/mustrdShapes.ttl +16 -6
- mustrd/model/ontology.ttl +1 -2
- mustrd/mustrd.py +442 -227
- mustrd/mustrdRdfLib.py +8 -1
- mustrd/namespace.py +10 -1
- mustrd/spec_component.py +224 -45
- mustrd/steprunner.py +62 -14
- mustrd-0.3.1a1.dist-info/METADATA +97 -0
- {mustrd-0.3.0.0.dist-info → mustrd-0.3.1a1.dist-info}/RECORD +14 -14
- mustrd-0.3.0.0.dist-info/METADATA +0 -96
- {mustrd-0.3.0.0.dist-info → mustrd-0.3.1a1.dist-info}/LICENSE +0 -0
- {mustrd-0.3.0.0.dist-info → mustrd-0.3.1a1.dist-info}/WHEEL +0 -0
- {mustrd-0.3.0.0.dist-info → mustrd-0.3.1a1.dist-info}/entry_points.txt +0 -0
mustrd/mustrdRdfLib.py
CHANGED
@@ -25,6 +25,7 @@ SOFTWARE.
|
|
25
25
|
from pyparsing import ParseException
|
26
26
|
from rdflib import Graph
|
27
27
|
from requests import RequestException
|
28
|
+
import logging
|
28
29
|
|
29
30
|
|
30
31
|
def execute_select(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> str:
|
@@ -38,7 +39,13 @@ def execute_select(triple_store: dict, given: Graph, when: str, bindings: dict =
|
|
38
39
|
|
39
40
|
def execute_construct(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> Graph:
|
40
41
|
try:
|
41
|
-
|
42
|
+
logger = logging.getLogger(__name__)
|
43
|
+
logger.debug(f"Executing CONSTRUCT query: {when} with bindings: {bindings}")
|
44
|
+
|
45
|
+
|
46
|
+
result_graph = given.query(when, initBindings=bindings).graph
|
47
|
+
logger.debug(f"CONSTRUCT query executed successfully, resulting graph has {len(result_graph)} triples.")
|
48
|
+
return result_graph
|
42
49
|
except ParseException:
|
43
50
|
raise
|
44
51
|
except Exception as e:
|
mustrd/namespace.py
CHANGED
@@ -38,13 +38,15 @@ class MUST(DefinedNamespace):
|
|
38
38
|
AnzoQueryDrivenUpdateSparql: URIRef
|
39
39
|
AskSparql: URIRef
|
40
40
|
DescribeSparql: URIRef
|
41
|
-
|
41
|
+
SpadeEdnGroupSource: URIRef
|
42
|
+
|
42
43
|
# Specification properties
|
43
44
|
given: URIRef
|
44
45
|
when: URIRef
|
45
46
|
then: URIRef
|
46
47
|
dataSource: URIRef
|
47
48
|
file: URIRef
|
49
|
+
fileurl: URIRef
|
48
50
|
fileName: URIRef
|
49
51
|
queryFolder: URIRef
|
50
52
|
queryName: URIRef
|
@@ -124,3 +126,10 @@ class MUSTRDTEST(DefinedNamespace):
|
|
124
126
|
triplestoreSpecPath: URIRef
|
125
127
|
hasPytestPath: URIRef
|
126
128
|
filterOnTripleStore: URIRef
|
129
|
+
|
130
|
+
from rdflib import Namespace
|
131
|
+
|
132
|
+
MUST = Namespace("https://mustrd.com/model/")
|
133
|
+
|
134
|
+
# Add SpadeEdnGroupSource to the namespace
|
135
|
+
MUST.SpadeEdnGroupSource = MUST["SpadeEdnGroupSource"]
|
mustrd/spec_component.py
CHANGED
@@ -33,6 +33,7 @@ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, Conjunctive
|
|
33
33
|
from rdflib.exceptions import ParserError
|
34
34
|
from rdflib.term import Node
|
35
35
|
from rdflib.plugins.stores.memory import Memory
|
36
|
+
import edn_format
|
36
37
|
|
37
38
|
from . import logger_setup
|
38
39
|
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
|
@@ -40,6 +41,7 @@ from .mustrdAnzo import get_query_from_querybuilder
|
|
40
41
|
from .namespace import MUST, TRIPLESTORE
|
41
42
|
from multimethods import MultiMethod, Default
|
42
43
|
from .utils import get_mustrd_root
|
44
|
+
from urllib.parse import urlparse
|
43
45
|
|
44
46
|
log = logger_setup.setup_logger(__name__)
|
45
47
|
|
@@ -68,6 +70,12 @@ class AnzoWhenSpec(WhenSpec):
|
|
68
70
|
spec_component_details: any = None
|
69
71
|
|
70
72
|
|
73
|
+
@dataclass
|
74
|
+
class SpadeEdnGroupSourceWhenSpec(WhenSpec):
|
75
|
+
file: str = None
|
76
|
+
groupId: str = None
|
77
|
+
|
78
|
+
|
71
79
|
@dataclass
|
72
80
|
class ThenSpec(SpecComponent):
|
73
81
|
value: Graph = Graph()
|
@@ -254,13 +262,13 @@ get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispat
|
|
254
262
|
|
255
263
|
@get_spec_component.method((MUST.InheritedDataset, MUST.given))
|
256
264
|
def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
257
|
-
spec_component =
|
265
|
+
spec_component = GivenSpec()
|
258
266
|
return spec_component
|
259
267
|
|
260
268
|
|
261
269
|
@get_spec_component.method((MUST.FolderDataset, MUST.given))
|
262
270
|
def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
263
|
-
spec_component =
|
271
|
+
spec_component = GivenSpec()
|
264
272
|
|
265
273
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
266
274
|
predicate=MUST.fileName)
|
@@ -276,7 +284,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
|
|
276
284
|
|
277
285
|
@get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
|
278
286
|
def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
279
|
-
spec_component =
|
287
|
+
spec_component = WhenSpec()
|
280
288
|
|
281
289
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
282
290
|
predicate=MUST.fileName)
|
@@ -291,7 +299,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
|
|
291
299
|
|
292
300
|
@get_spec_component.method((MUST.FolderDataset, MUST.then))
|
293
301
|
def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
294
|
-
spec_component =
|
302
|
+
spec_component = ThenSpec()
|
295
303
|
|
296
304
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
297
305
|
predicate=MUST.fileName)
|
@@ -301,17 +309,44 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
|
|
301
309
|
|
302
310
|
|
303
311
|
@get_spec_component.method((MUST.FileDataset, MUST.given))
|
304
|
-
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
305
312
|
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
306
|
-
spec_component =
|
313
|
+
spec_component = GivenSpec()
|
314
|
+
return load_spec_component(spec_component_details, spec_component)
|
315
|
+
|
316
|
+
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
317
|
+
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
318
|
+
spec_component = ThenSpec()
|
307
319
|
return load_spec_component(spec_component_details, spec_component)
|
308
320
|
|
309
321
|
|
310
322
|
def load_spec_component(spec_component_details, spec_component):
|
311
|
-
file_path =
|
312
|
-
|
323
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
324
|
+
file_path = Path(str(file_path))
|
313
325
|
return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
|
314
326
|
|
327
|
+
def get_file_or_fileurl(spec_component_details):
|
328
|
+
file_path = spec_component_details.spec_graph.value(
|
329
|
+
subject=spec_component_details.spec_component_node,
|
330
|
+
predicate=MUST.file
|
331
|
+
)
|
332
|
+
if file_path is None:
|
333
|
+
file_path = spec_component_details.spec_graph.value(
|
334
|
+
subject=spec_component_details.spec_component_node,
|
335
|
+
predicate=MUST.fileurl
|
336
|
+
)
|
337
|
+
if file_path is not None and str(file_path).startswith("file://"):
|
338
|
+
# Remove the 'file://' scheme to get the local path
|
339
|
+
# we do it this quick and dirty way because the urlparse library assumes absolute paths, and strips our leading ./
|
340
|
+
# need to confirm this approach is windows safe.
|
341
|
+
|
342
|
+
new_path = str(file_path)[7:]
|
343
|
+
log.debug(f"converted {file_path=} to {new_path=}")
|
344
|
+
file_path = new_path
|
345
|
+
if file_path is None:
|
346
|
+
# shacl validation will catch this, but we want to raise a more specific error
|
347
|
+
raise ValueError("Neither MUST.file nor MUST.fileurl found for the spec component node")
|
348
|
+
return file_path
|
349
|
+
|
315
350
|
|
316
351
|
def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
317
352
|
if path.is_dir():
|
@@ -342,10 +377,10 @@ def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
|
342
377
|
|
343
378
|
@get_spec_component.method((MUST.FileSparqlSource, MUST.when))
|
344
379
|
def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
345
|
-
spec_component =
|
346
|
-
|
347
|
-
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
348
|
-
|
380
|
+
spec_component = WhenSpec()
|
381
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
382
|
+
# file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
383
|
+
# predicate=MUST.file)))
|
349
384
|
spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
|
350
385
|
|
351
386
|
spec_component.queryType = spec_component_details.spec_graph.value(
|
@@ -357,7 +392,7 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
|
|
357
392
|
|
358
393
|
@get_spec_component.method((MUST.TextSparqlSource, MUST.when))
|
359
394
|
def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
360
|
-
spec_component =
|
395
|
+
spec_component = WhenSpec()
|
361
396
|
|
362
397
|
# Get specComponent directly from config file (in text string)
|
363
398
|
spec_component.value = str(
|
@@ -371,22 +406,35 @@ def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDe
|
|
371
406
|
return spec_component
|
372
407
|
|
373
408
|
|
374
|
-
|
375
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
376
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
377
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
378
|
-
def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
379
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
380
|
-
|
409
|
+
def _get_spec_component_HttpDataset_shared(spec_component_details: SpecComponentDetails, spec_component):
|
381
410
|
# Get specComponent with http GET protocol
|
382
|
-
|
383
|
-
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
384
|
-
predicate=MUST.dataSourceUrl)).content)
|
385
|
-
spec_component.queryType = spec_component_details.spec_graph.value(
|
411
|
+
url = spec_component_details.spec_graph.value(
|
386
412
|
subject=spec_component_details.spec_component_node,
|
387
|
-
predicate=MUST.
|
413
|
+
predicate=MUST.dataSourceUrl
|
414
|
+
)
|
415
|
+
if not url:
|
416
|
+
raise ValueError("MUST.dataSourceUrl is missing for HttpDataset")
|
417
|
+
response = requests.get(str(url))
|
418
|
+
response.raise_for_status()
|
419
|
+
spec_component.value = response.content
|
420
|
+
if hasattr(spec_component, "queryType"):
|
421
|
+
spec_component.queryType = spec_component_details.spec_graph.value(
|
422
|
+
subject=spec_component_details.spec_component_node,
|
423
|
+
predicate=MUST.queryType)
|
388
424
|
return spec_component
|
389
425
|
|
426
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
427
|
+
def _get_spec_component_HttpDataset_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
428
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, GivenSpec())
|
429
|
+
|
430
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
431
|
+
def _get_spec_component_HttpDataset_when(spec_component_details: SpecComponentDetails) -> WhenSpec:
|
432
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, WhenSpec())
|
433
|
+
|
434
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
435
|
+
def _get_spec_component_HttpDataset_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
436
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, ThenSpec())
|
437
|
+
|
390
438
|
|
391
439
|
@get_spec_component.method((MUST.TableDataset, MUST.then))
|
392
440
|
def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
@@ -407,7 +455,7 @@ def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails)
|
|
407
455
|
|
408
456
|
@get_spec_component.method((MUST.EmptyGraph, MUST.then))
|
409
457
|
def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
410
|
-
spec_component =
|
458
|
+
spec_component = ThenSpec()
|
411
459
|
|
412
460
|
return spec_component
|
413
461
|
|
@@ -415,7 +463,11 @@ def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails)
|
|
415
463
|
@get_spec_component.method((MUST.StatementsDataset, MUST.given))
|
416
464
|
@get_spec_component.method((MUST.StatementsDataset, MUST.then))
|
417
465
|
def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
418
|
-
|
466
|
+
# Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
|
467
|
+
if spec_component_details.predicate == MUST.given:
|
468
|
+
spec_component = GivenSpec()
|
469
|
+
else:
|
470
|
+
spec_component = ThenSpec()
|
419
471
|
store = Memory()
|
420
472
|
g = URIRef("http://localhost:7200/test-graph")
|
421
473
|
spec_component.value = ConjunctiveGraph(store=store)
|
@@ -430,7 +482,11 @@ def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentD
|
|
430
482
|
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
|
431
483
|
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
|
432
484
|
def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
433
|
-
|
485
|
+
# Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
|
486
|
+
if spec_component_details.predicate == MUST.given:
|
487
|
+
spec_component = GivenSpec()
|
488
|
+
else:
|
489
|
+
spec_component = ThenSpec()
|
434
490
|
|
435
491
|
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
436
492
|
# Get GIVEN or THEN from anzo graphmart
|
@@ -443,7 +499,7 @@ def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecCompone
|
|
443
499
|
|
444
500
|
@get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
|
445
501
|
def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
446
|
-
spec_component =
|
502
|
+
spec_component = WhenSpec()
|
447
503
|
|
448
504
|
# Get WHEN specComponent from query builder
|
449
505
|
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
@@ -488,7 +544,7 @@ def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: Sp
|
|
488
544
|
|
489
545
|
@get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
|
490
546
|
def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent: # noqa
|
491
|
-
spec_component =
|
547
|
+
spec_component = WhenSpec(
|
492
548
|
spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
493
549
|
|
494
550
|
# Get WHEN specComponent from query builder
|
@@ -524,8 +580,8 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
|
|
524
580
|
else:
|
525
581
|
raise ValueError("This test specification is specific to Anzo and can only be run against that platform.")
|
526
582
|
for query in queries:
|
527
|
-
spec_component =
|
528
|
-
|
583
|
+
spec_component = WhenSpec(
|
584
|
+
spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
529
585
|
spec_component.value = query.get("query")
|
530
586
|
spec_component.paramQuery = query.get("param_query")
|
531
587
|
spec_component.queryTemplate = query.get("query_template")
|
@@ -542,23 +598,65 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
|
|
542
598
|
|
543
599
|
@get_spec_component.method(Default)
|
544
600
|
def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
601
|
+
valid_combinations = [key for key in get_spec_component.methods.keys() if key != Default]
|
602
|
+
|
603
|
+
if (spec_component_details.data_source_type, spec_component_details.predicate) not in valid_combinations:
|
604
|
+
valid_types = ', '.join([f"({data_source_type}, {predicate})" for data_source_type, predicate in valid_combinations])
|
605
|
+
raise ValueError(
|
606
|
+
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
607
|
+
f"spec component ({spec_component_details.predicate}). Valid combinations are: {valid_types}"
|
608
|
+
)
|
545
609
|
raise ValueError(
|
546
610
|
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
547
611
|
f"spec component ({spec_component_details.predicate})")
|
548
612
|
|
549
613
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
614
|
+
@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
|
615
|
+
def _get_spec_component_spadeednsource_when(spec_component_details: SpecComponentDetails) -> SpadeEdnGroupSourceWhenSpec:
|
616
|
+
from edn_format import Keyword
|
617
|
+
|
618
|
+
spec_component = SpadeEdnGroupSourceWhenSpec()
|
619
|
+
spec_component.file = spec_component_details.spec_graph.value(
|
620
|
+
subject=spec_component_details.spec_component_node,
|
621
|
+
predicate=MUST.fileName
|
622
|
+
)
|
623
|
+
spec_component.groupId = spec_component_details.spec_graph.value(
|
624
|
+
subject=spec_component_details.spec_component_node,
|
625
|
+
predicate=MUST.groupId
|
626
|
+
)
|
627
|
+
spec_component.queryType = spec_component_details.spec_graph.value(
|
628
|
+
subject=spec_component_details.spec_component_node,
|
629
|
+
predicate=MUST.queryType
|
630
|
+
)
|
631
|
+
|
632
|
+
# Initialize `value` by parsing the `file` attribute if available
|
633
|
+
if spec_component.file:
|
634
|
+
try:
|
635
|
+
with open(spec_component.file, "r") as edn_file:
|
636
|
+
edn_content = edn_file.read()
|
637
|
+
parsed_edn = edn_format.loads(edn_content)
|
638
|
+
|
639
|
+
# Extract group data based on group ID
|
640
|
+
step_groups = parsed_edn.get(Keyword("step-groups"), [])
|
641
|
+
group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == spec_component.groupId), None)
|
642
|
+
|
643
|
+
if not group_data:
|
644
|
+
raise ValueError(f"Group ID {spec_component.groupId} not found in EDN file {spec_component.file}")
|
645
|
+
|
646
|
+
# Create a list of WhenSpec objects
|
647
|
+
when_specs = []
|
648
|
+
for step in group_data.get(Keyword("steps"), []):
|
649
|
+
step_type = step.get(Keyword("type"))
|
650
|
+
step_file = step.get(Keyword("filepath"))
|
651
|
+
|
652
|
+
if step_type == Keyword("sparql-file"):
|
653
|
+
when_specs.append(WhenSpec(value=step_file, queryType=MUST.InsertSparql))
|
654
|
+
|
655
|
+
spec_component.value = when_specs
|
656
|
+
except Exception as e:
|
657
|
+
log.error(f"Failed to parse EDN file {spec_component.file}: {e}")
|
658
|
+
spec_component.value = None
|
659
|
+
|
562
660
|
return spec_component
|
563
661
|
|
564
662
|
|
@@ -634,7 +732,7 @@ def get_spec_from_table(subject: URIRef,
|
|
634
732
|
columns.add(row.variable.value + "_datatype")
|
635
733
|
# add an additional column for the sort order (if any) of the results
|
636
734
|
columns.add("order")
|
637
|
-
# create an empty dataframe to populate with the results
|
735
|
+
# create an empty dataframe to populate with the results data
|
638
736
|
df = pandas.DataFrame(index=list(index), columns=list(columns))
|
639
737
|
# fill the dataframe with the results data
|
640
738
|
for row in expected_results:
|
@@ -700,3 +798,84 @@ def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph
|
|
700
798
|
}}"""
|
701
799
|
is_ordered = spec_graph.query(ask_select_ordered)
|
702
800
|
return is_ordered.askAnswer
|
801
|
+
|
802
|
+
|
803
|
+
@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
|
804
|
+
def _get_spec_component_spade_edn_group_source_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
805
|
+
spec_component = SpadeEdnGroupSourceWhenSpec()
|
806
|
+
|
807
|
+
# Retrieve the file path for the EDN file
|
808
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
809
|
+
absolute_file_path = get_file_absolute_path(spec_component_details, file_path)
|
810
|
+
|
811
|
+
# Parse the EDN file
|
812
|
+
try:
|
813
|
+
edn_content = Path(absolute_file_path).read_text()
|
814
|
+
edn_data = edn_format.loads(edn_content)
|
815
|
+
except FileNotFoundError:
|
816
|
+
raise ValueError(f"EDN file not found: {absolute_file_path}")
|
817
|
+
except edn_format.EDNDecodeError as e:
|
818
|
+
raise ValueError(f"Failed to parse EDN file {absolute_file_path}: {e}")
|
819
|
+
|
820
|
+
# Retrieve and normalize the group ID
|
821
|
+
group_id = spec_component_details.spec_graph.value(
|
822
|
+
subject=spec_component_details.spec_component_node,
|
823
|
+
predicate=MUST.groupId
|
824
|
+
)
|
825
|
+
|
826
|
+
if not group_id:
|
827
|
+
raise ValueError("groupId is missing for SpadeEdnGroupSource")
|
828
|
+
|
829
|
+
if str(group_id).startswith(':'):
|
830
|
+
group_id = str(group_id).lstrip(':')
|
831
|
+
from edn_format import Keyword
|
832
|
+
group_id = Keyword(group_id)
|
833
|
+
else:
|
834
|
+
group_id = str(group_id)
|
835
|
+
|
836
|
+
# Extract the relevant group data
|
837
|
+
step_groups = edn_data.get(Keyword("step-groups"), [])
|
838
|
+
group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == group_id), None)
|
839
|
+
|
840
|
+
if not group_data:
|
841
|
+
raise ValueError(f"Group ID {group_id} not found in EDN file {absolute_file_path}")
|
842
|
+
|
843
|
+
# Create a list of WhenSpec objects
|
844
|
+
when_specs = []
|
845
|
+
for step in group_data.get(Keyword("steps"), []):
|
846
|
+
step_type = step.get(Keyword("type"))
|
847
|
+
step_file = step.get(Keyword("filepath"))
|
848
|
+
|
849
|
+
if step_type == Keyword("sparql-file"):
|
850
|
+
try:
|
851
|
+
with open(step_file, 'r') as sparql_file:
|
852
|
+
sparql_query = sparql_file.read()
|
853
|
+
|
854
|
+
# Assume the individuals are ConstructSparql queries
|
855
|
+
# won't be true for ASK, but good for now.
|
856
|
+
when_spec = WhenSpec(
|
857
|
+
value=sparql_query,
|
858
|
+
queryType=MUST.UpdateSparql,
|
859
|
+
bindings=None
|
860
|
+
)
|
861
|
+
when_specs.append(when_spec)
|
862
|
+
except FileNotFoundError:
|
863
|
+
raise ValueError(f"SPARQL file not found: {step_file}")
|
864
|
+
|
865
|
+
spec_component.file = str(absolute_file_path)
|
866
|
+
spec_component.groupId = group_id
|
867
|
+
spec_component.value = when_specs
|
868
|
+
spec_component.queryType = MUST.SpadeEdnGroupSource # Correct query type
|
869
|
+
|
870
|
+
return spec_component
|
871
|
+
|
872
|
+
|
873
|
+
def parse_sparql_query(query_string: str):
|
874
|
+
"""
|
875
|
+
Parses a SPARQL query string and returns a query object.
|
876
|
+
"""
|
877
|
+
try:
|
878
|
+
from rdflib.plugins.sparql.parser import parseQuery
|
879
|
+
return parseQuery(query_string)
|
880
|
+
except Exception as e:
|
881
|
+
raise ValueError(f"Failed to parse SPARQL query: {e}")
|
mustrd/steprunner.py
CHANGED
@@ -23,6 +23,7 @@ SOFTWARE.
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import json
|
26
|
+
import os
|
26
27
|
|
27
28
|
from multimethods import MultiMethod, Default
|
28
29
|
from .namespace import MUST, TRIPLESTORE
|
@@ -38,8 +39,9 @@ from .mustrdGraphDb import upload_given as upload_given_graphdb
|
|
38
39
|
from .mustrdGraphDb import execute_update as execute_update_graphdb
|
39
40
|
from .mustrdGraphDb import execute_construct as execute_construct_graphdb
|
40
41
|
from .mustrdGraphDb import execute_select as execute_select_graphdb
|
41
|
-
from .spec_component import AnzoWhenSpec, WhenSpec
|
42
|
+
from .spec_component import AnzoWhenSpec, WhenSpec, SpadeEdnGroupSourceWhenSpec
|
42
43
|
import logging
|
44
|
+
from edn_format import loads, Keyword
|
43
45
|
|
44
46
|
log = logging.getLogger(__name__)
|
45
47
|
|
@@ -71,14 +73,14 @@ def _upload_given_anzo(triple_store: dict, given: Graph):
|
|
71
73
|
def dispatch_run_when(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
72
74
|
ts = triple_store['type']
|
73
75
|
query_type = when.queryType
|
74
|
-
log.info(f"dispatch_run_when
|
76
|
+
log.info(f"dispatch_run_when: spec_uri={spec_uri}, ({ts},{query_type})")
|
75
77
|
return ts, query_type
|
76
78
|
|
77
79
|
|
78
|
-
|
80
|
+
run_when_impl = MultiMethod('run_when', dispatch_run_when)
|
79
81
|
|
80
82
|
|
81
|
-
@
|
83
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
|
82
84
|
def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
83
85
|
log.debug(f"_anzo_run_when_update {spec_uri} {triple_store} {when} {type(when)}")
|
84
86
|
if when.value is None:
|
@@ -92,47 +94,47 @@ def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSp
|
|
92
94
|
return execute_update_anzo(triple_store, query, when.bindings)
|
93
95
|
|
94
96
|
|
95
|
-
@
|
97
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
|
96
98
|
def _anzo_run_when_construct(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
97
99
|
return execute_construct_anzo(triple_store, when.value, when.bindings)
|
98
100
|
|
99
101
|
|
100
|
-
@
|
102
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
|
101
103
|
def _anzo_run_when_select(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
102
104
|
return execute_select_anzo(triple_store, when.value, when.bindings)
|
103
105
|
|
104
106
|
|
105
|
-
@
|
107
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
|
106
108
|
def _graphdb_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
107
109
|
return execute_update_graphdb(triple_store, when.value, when.bindings)
|
108
110
|
|
109
111
|
|
110
|
-
@
|
112
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
|
111
113
|
def _graphdb_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
112
114
|
return execute_construct_graphdb(triple_store, when.value, when.bindings)
|
113
115
|
|
114
116
|
|
115
|
-
@
|
117
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
|
116
118
|
def _graphdb_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
117
119
|
return execute_select_graphdb(triple_store, when.value, when.bindings)
|
118
120
|
|
119
121
|
|
120
|
-
@
|
122
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
|
121
123
|
def _rdflib_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
122
124
|
return execute_update_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
123
125
|
|
124
126
|
|
125
|
-
@
|
127
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
|
126
128
|
def _rdflib_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
127
129
|
return execute_construct_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
128
130
|
|
129
131
|
|
130
|
-
@
|
132
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
|
131
133
|
def _rdflib_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
132
134
|
return execute_select_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
133
135
|
|
134
136
|
|
135
|
-
@
|
137
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
|
136
138
|
def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
137
139
|
# run the parameters query to obtain the values for the template step and put them into a dictionary
|
138
140
|
query_parameters = json.loads(execute_select_anzo(triple_store, when.paramQuery, None))
|
@@ -159,7 +161,50 @@ def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dic
|
|
159
161
|
return result
|
160
162
|
|
161
163
|
|
162
|
-
@
|
164
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SpadeEdnGroupSource))
|
165
|
+
def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
|
166
|
+
log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
|
167
|
+
|
168
|
+
results = []
|
169
|
+
|
170
|
+
# Iterate over the list of WhenSpec objects in `when.value`
|
171
|
+
for step_when_spec in when.value:
|
172
|
+
try:
|
173
|
+
log.info(f"Dispatching run_when for step: {step_when_spec}")
|
174
|
+
query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
|
175
|
+
log.info(f"Executed SPARQL query: {query_result}")
|
176
|
+
results.append(query_result)
|
177
|
+
except Exception as e:
|
178
|
+
log.error(f"Failed to execute SPARQL query: {e}")
|
179
|
+
|
180
|
+
log.debug(f"Final results: {results}")
|
181
|
+
return results
|
182
|
+
|
183
|
+
|
184
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SpadeEdnGroupSource))
|
185
|
+
def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
|
186
|
+
log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
|
187
|
+
|
188
|
+
merged_graph = Graph()
|
189
|
+
|
190
|
+
# Iterate over the list of WhenSpec objects in `when.value`
|
191
|
+
for step_when_spec in when.value:
|
192
|
+
try:
|
193
|
+
if step_when_spec.queryType == MUST.UpdateSparql:
|
194
|
+
log.info(f"Dispatching run_when for UpdateSparql step: {step_when_spec}")
|
195
|
+
query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
|
196
|
+
log.info(f"Executed SPARQL query: {query_result}")
|
197
|
+
merged_graph += query_result # Merge the resulting graph
|
198
|
+
else:
|
199
|
+
log.warning(f"Unsupported queryType: {step_when_spec.queryType}")
|
200
|
+
except Exception as e:
|
201
|
+
log.error(f"Failed to execute SPARQL query: {e}")
|
202
|
+
|
203
|
+
log.debug(f"Final merged graph has {len(merged_graph)} triples.")
|
204
|
+
return merged_graph
|
205
|
+
|
206
|
+
|
207
|
+
@run_when_impl.method(Default)
|
163
208
|
def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
164
209
|
log.error(f"run_when not implemented for {spec_uri} {triple_store} {when}")
|
165
210
|
if when.queryType == MUST.AskSparql:
|
@@ -174,3 +219,6 @@ def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec
|
|
174
219
|
log.warning(f"Skipping {spec_uri}, {when.queryType} is not a valid SPARQL query type.")
|
175
220
|
msg = f"{when.queryType} is not a valid SPARQL query type."
|
176
221
|
raise NotImplementedError(msg)
|
222
|
+
|
223
|
+
log.debug(f"run_when registry: {run_when_impl} {dir(run_when_impl)}")
|
224
|
+
|