mustrd 0.2.7a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mustrd/README.md +2 -0
- mustrd/anzo_utils.py +8 -5
- mustrd/logger_setup.py +3 -0
- mustrd/model/mustrdShapes.ttl +25 -6
- mustrd/model/ontology.ttl +6 -2
- mustrd/mustrd.py +508 -235
- mustrd/mustrdAnzo.py +3 -2
- mustrd/mustrdRdfLib.py +8 -1
- mustrd/mustrdTestPlugin.py +299 -128
- mustrd/namespace.py +10 -1
- mustrd/spec_component.py +238 -58
- mustrd/steprunner.py +78 -20
- mustrd-0.3.1a0.dist-info/METADATA +96 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/RECORD +17 -17
- mustrd-0.2.7a0.dist-info/METADATA +0 -96
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/LICENSE +0 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/WHEEL +0 -0
- {mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/entry_points.txt +0 -0
mustrd/spec_component.py
CHANGED
@@ -33,13 +33,15 @@ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, Conjunctive
|
|
33
33
|
from rdflib.exceptions import ParserError
|
34
34
|
from rdflib.term import Node
|
35
35
|
from rdflib.plugins.stores.memory import Memory
|
36
|
+
import edn_format
|
36
37
|
|
37
38
|
from . import logger_setup
|
38
|
-
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
|
39
|
-
from .mustrdAnzo import get_query_from_querybuilder
|
39
|
+
from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
|
40
|
+
from .mustrdAnzo import get_query_from_querybuilder
|
40
41
|
from .namespace import MUST, TRIPLESTORE
|
41
42
|
from multimethods import MultiMethod, Default
|
42
43
|
from .utils import get_mustrd_root
|
44
|
+
from urllib.parse import urlparse
|
43
45
|
|
44
46
|
log = logger_setup.setup_logger(__name__)
|
45
47
|
|
@@ -65,6 +67,13 @@ class WhenSpec(SpecComponent):
|
|
65
67
|
class AnzoWhenSpec(WhenSpec):
|
66
68
|
paramQuery: str = None
|
67
69
|
queryTemplate: str = None
|
70
|
+
spec_component_details: any = None
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class SpadeEdnGroupSourceWhenSpec(WhenSpec):
|
75
|
+
file: str = None
|
76
|
+
groupId: str = None
|
68
77
|
|
69
78
|
|
70
79
|
@dataclass
|
@@ -108,6 +117,7 @@ def parse_spec_component(subject: URIRef,
|
|
108
117
|
for spec_component_node in spec_component_nodes:
|
109
118
|
data_source_types = get_data_source_types(subject, predicate, spec_graph, spec_component_node)
|
110
119
|
for data_source_type in data_source_types:
|
120
|
+
log.debug(f"parse_spec_component {spec_component_node} {data_source_type} {mustrd_triple_store=}")
|
111
121
|
spec_component_details = SpecComponentDetails(
|
112
122
|
subject=subject,
|
113
123
|
predicate=predicate,
|
@@ -117,6 +127,9 @@ def parse_spec_component(subject: URIRef,
|
|
117
127
|
data_source_type=data_source_type,
|
118
128
|
run_config=run_config,
|
119
129
|
root_paths=get_components_roots(spec_graph, subject, run_config))
|
130
|
+
|
131
|
+
# get_spec_component potentially talks to anzo for EVERY spec, massively slowing things down
|
132
|
+
# can we defer it to run time?
|
120
133
|
spec_component = get_spec_component(spec_component_details)
|
121
134
|
if isinstance(spec_component, list):
|
122
135
|
spec_components += spec_component
|
@@ -249,13 +262,13 @@ get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispat
|
|
249
262
|
|
250
263
|
@get_spec_component.method((MUST.InheritedDataset, MUST.given))
|
251
264
|
def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
252
|
-
spec_component =
|
265
|
+
spec_component = GivenSpec()
|
253
266
|
return spec_component
|
254
267
|
|
255
268
|
|
256
269
|
@get_spec_component.method((MUST.FolderDataset, MUST.given))
|
257
270
|
def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
258
|
-
spec_component =
|
271
|
+
spec_component = GivenSpec()
|
259
272
|
|
260
273
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
261
274
|
predicate=MUST.fileName)
|
@@ -271,7 +284,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
|
|
271
284
|
|
272
285
|
@get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
|
273
286
|
def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
274
|
-
spec_component =
|
287
|
+
spec_component = WhenSpec()
|
275
288
|
|
276
289
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
277
290
|
predicate=MUST.fileName)
|
@@ -286,7 +299,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
|
|
286
299
|
|
287
300
|
@get_spec_component.method((MUST.FolderDataset, MUST.then))
|
288
301
|
def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
289
|
-
spec_component =
|
302
|
+
spec_component = ThenSpec()
|
290
303
|
|
291
304
|
file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
292
305
|
predicate=MUST.fileName)
|
@@ -296,17 +309,44 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
|
|
296
309
|
|
297
310
|
|
298
311
|
@get_spec_component.method((MUST.FileDataset, MUST.given))
|
299
|
-
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
300
312
|
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
301
|
-
spec_component =
|
313
|
+
spec_component = GivenSpec()
|
314
|
+
return load_spec_component(spec_component_details, spec_component)
|
315
|
+
|
316
|
+
@get_spec_component.method((MUST.FileDataset, MUST.then))
|
317
|
+
def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
318
|
+
spec_component = ThenSpec()
|
302
319
|
return load_spec_component(spec_component_details, spec_component)
|
303
320
|
|
304
321
|
|
305
322
|
def load_spec_component(spec_component_details, spec_component):
|
306
|
-
file_path =
|
307
|
-
|
323
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
324
|
+
file_path = Path(str(file_path))
|
308
325
|
return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
|
309
326
|
|
327
|
+
def get_file_or_fileurl(spec_component_details):
|
328
|
+
file_path = spec_component_details.spec_graph.value(
|
329
|
+
subject=spec_component_details.spec_component_node,
|
330
|
+
predicate=MUST.file
|
331
|
+
)
|
332
|
+
if file_path is None:
|
333
|
+
file_path = spec_component_details.spec_graph.value(
|
334
|
+
subject=spec_component_details.spec_component_node,
|
335
|
+
predicate=MUST.fileurl
|
336
|
+
)
|
337
|
+
if file_path is not None and str(file_path).startswith("file://"):
|
338
|
+
# Remove the 'file://' scheme to get the local path
|
339
|
+
# we do it this quick and dirty way because the urlparse library assumes absolute paths, and strips our leading ./
|
340
|
+
# need to confirm this approach is windows safe.
|
341
|
+
|
342
|
+
new_path = str(file_path)[7:]
|
343
|
+
log.debug(f"converted {file_path=} to {new_path=}")
|
344
|
+
file_path = new_path
|
345
|
+
if file_path is None:
|
346
|
+
# shacl validation will catch this, but we want to raise a more specific error
|
347
|
+
raise ValueError("Neither MUST.file nor MUST.fileurl found for the spec component node")
|
348
|
+
return file_path
|
349
|
+
|
310
350
|
|
311
351
|
def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
312
352
|
if path.is_dir():
|
@@ -337,10 +377,10 @@ def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
|
|
337
377
|
|
338
378
|
@get_spec_component.method((MUST.FileSparqlSource, MUST.when))
|
339
379
|
def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
340
|
-
spec_component =
|
341
|
-
|
342
|
-
file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
343
|
-
|
380
|
+
spec_component = WhenSpec()
|
381
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
382
|
+
# file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
383
|
+
# predicate=MUST.file)))
|
344
384
|
spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
|
345
385
|
|
346
386
|
spec_component.queryType = spec_component_details.spec_graph.value(
|
@@ -352,7 +392,7 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
|
|
352
392
|
|
353
393
|
@get_spec_component.method((MUST.TextSparqlSource, MUST.when))
|
354
394
|
def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
355
|
-
spec_component =
|
395
|
+
spec_component = WhenSpec()
|
356
396
|
|
357
397
|
# Get specComponent directly from config file (in text string)
|
358
398
|
spec_component.value = str(
|
@@ -366,22 +406,35 @@ def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDe
|
|
366
406
|
return spec_component
|
367
407
|
|
368
408
|
|
369
|
-
|
370
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
371
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
372
|
-
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
373
|
-
def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
374
|
-
spec_component = init_spec_component(spec_component_details.predicate)
|
375
|
-
|
409
|
+
def _get_spec_component_HttpDataset_shared(spec_component_details: SpecComponentDetails, spec_component):
|
376
410
|
# Get specComponent with http GET protocol
|
377
|
-
|
378
|
-
spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
379
|
-
predicate=MUST.dataSourceUrl)).content)
|
380
|
-
spec_component.queryType = spec_component_details.spec_graph.value(
|
411
|
+
url = spec_component_details.spec_graph.value(
|
381
412
|
subject=spec_component_details.spec_component_node,
|
382
|
-
predicate=MUST.
|
413
|
+
predicate=MUST.dataSourceUrl
|
414
|
+
)
|
415
|
+
if not url:
|
416
|
+
raise ValueError("MUST.dataSourceUrl is missing for HttpDataset")
|
417
|
+
response = requests.get(str(url))
|
418
|
+
response.raise_for_status()
|
419
|
+
spec_component.value = response.content
|
420
|
+
if hasattr(spec_component, "queryType"):
|
421
|
+
spec_component.queryType = spec_component_details.spec_graph.value(
|
422
|
+
subject=spec_component_details.spec_component_node,
|
423
|
+
predicate=MUST.queryType)
|
383
424
|
return spec_component
|
384
425
|
|
426
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.given))
|
427
|
+
def _get_spec_component_HttpDataset_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
|
428
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, GivenSpec())
|
429
|
+
|
430
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.when))
|
431
|
+
def _get_spec_component_HttpDataset_when(spec_component_details: SpecComponentDetails) -> WhenSpec:
|
432
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, WhenSpec())
|
433
|
+
|
434
|
+
@get_spec_component.method((MUST.HttpDataset, MUST.then))
|
435
|
+
def _get_spec_component_HttpDataset_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
|
436
|
+
return _get_spec_component_HttpDataset_shared(spec_component_details, ThenSpec())
|
437
|
+
|
385
438
|
|
386
439
|
@get_spec_component.method((MUST.TableDataset, MUST.then))
|
387
440
|
def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
@@ -402,7 +455,7 @@ def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails)
|
|
402
455
|
|
403
456
|
@get_spec_component.method((MUST.EmptyGraph, MUST.then))
|
404
457
|
def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
405
|
-
spec_component =
|
458
|
+
spec_component = ThenSpec()
|
406
459
|
|
407
460
|
return spec_component
|
408
461
|
|
@@ -410,7 +463,11 @@ def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails)
|
|
410
463
|
@get_spec_component.method((MUST.StatementsDataset, MUST.given))
|
411
464
|
@get_spec_component.method((MUST.StatementsDataset, MUST.then))
|
412
465
|
def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
413
|
-
|
466
|
+
# Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
|
467
|
+
if spec_component_details.predicate == MUST.given:
|
468
|
+
spec_component = GivenSpec()
|
469
|
+
else:
|
470
|
+
spec_component = ThenSpec()
|
414
471
|
store = Memory()
|
415
472
|
g = URIRef("http://localhost:7200/test-graph")
|
416
473
|
spec_component.value = ConjunctiveGraph(store=store)
|
@@ -425,18 +482,15 @@ def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentD
|
|
425
482
|
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
|
426
483
|
@get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
|
427
484
|
def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
428
|
-
|
485
|
+
# Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
|
486
|
+
if spec_component_details.predicate == MUST.given:
|
487
|
+
spec_component = GivenSpec()
|
488
|
+
else:
|
489
|
+
spec_component = ThenSpec()
|
429
490
|
|
430
491
|
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
431
492
|
# Get GIVEN or THEN from anzo graphmart
|
432
|
-
|
433
|
-
predicate=MUST.graphmart)
|
434
|
-
layer = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
435
|
-
predicate=MUST.layer)
|
436
|
-
spec_component.value = get_spec_component_from_graphmart(
|
437
|
-
triple_store=spec_component_details.mustrd_triple_store,
|
438
|
-
graphmart=graphmart,
|
439
|
-
layer=layer)
|
493
|
+
spec_component.spec_component_details = spec_component_details
|
440
494
|
else:
|
441
495
|
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartDataset}")
|
442
496
|
|
@@ -445,7 +499,7 @@ def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecCompone
|
|
445
499
|
|
446
500
|
@get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
|
447
501
|
def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
448
|
-
spec_component =
|
502
|
+
spec_component = WhenSpec()
|
449
503
|
|
450
504
|
# Get WHEN specComponent from query builder
|
451
505
|
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
@@ -468,14 +522,16 @@ def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: Spe
|
|
468
522
|
|
469
523
|
@get_spec_component.method((MUST.AnzoGraphmartStepSparqlSource, MUST.when))
|
470
524
|
def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
471
|
-
spec_component =
|
525
|
+
spec_component = AnzoWhenSpec()
|
472
526
|
|
473
527
|
# Get WHEN specComponent from query builder
|
474
528
|
if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
|
475
529
|
query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
|
476
530
|
predicate=MUST.anzoQueryStep)
|
477
|
-
spec_component.
|
478
|
-
|
531
|
+
spec_component.spec_component_details = spec_component_details
|
532
|
+
spec_component.query_step_uri = query_step_uri
|
533
|
+
# spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
|
534
|
+
# query_step_uri=query_step_uri)
|
479
535
|
# If anzo specific function is called but no anzo defined
|
480
536
|
else:
|
481
537
|
raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartStepSparqlSource}")
|
@@ -488,7 +544,7 @@ def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: Sp
|
|
488
544
|
|
489
545
|
@get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
|
490
546
|
def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent: # noqa
|
491
|
-
spec_component =
|
547
|
+
spec_component = WhenSpec(
|
492
548
|
spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
493
549
|
|
494
550
|
# Get WHEN specComponent from query builder
|
@@ -524,11 +580,12 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
|
|
524
580
|
else:
|
525
581
|
raise ValueError("This test specification is specific to Anzo and can only be run against that platform.")
|
526
582
|
for query in queries:
|
527
|
-
spec_component =
|
528
|
-
|
583
|
+
spec_component = WhenSpec(
|
584
|
+
spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
|
529
585
|
spec_component.value = query.get("query")
|
530
586
|
spec_component.paramQuery = query.get("param_query")
|
531
587
|
spec_component.queryTemplate = query.get("query_template")
|
588
|
+
spec_component.spec_component_details = spec_component_details
|
532
589
|
if spec_component.value:
|
533
590
|
spec_component.queryType = spec_component_details.spec_graph.value(
|
534
591
|
subject=spec_component_details.spec_component_node,
|
@@ -541,23 +598,65 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
|
|
541
598
|
|
542
599
|
@get_spec_component.method(Default)
|
543
600
|
def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
601
|
+
valid_combinations = [key for key in get_spec_component.methods.keys() if key != Default]
|
602
|
+
|
603
|
+
if (spec_component_details.data_source_type, spec_component_details.predicate) not in valid_combinations:
|
604
|
+
valid_types = ', '.join([f"({data_source_type}, {predicate})" for data_source_type, predicate in valid_combinations])
|
605
|
+
raise ValueError(
|
606
|
+
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
607
|
+
f"spec component ({spec_component_details.predicate}). Valid combinations are: {valid_types}"
|
608
|
+
)
|
544
609
|
raise ValueError(
|
545
610
|
f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
|
546
611
|
f"spec component ({spec_component_details.predicate})")
|
547
612
|
|
548
613
|
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
614
|
+
@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
|
615
|
+
def _get_spec_component_spadeednsource_when(spec_component_details: SpecComponentDetails) -> SpadeEdnGroupSourceWhenSpec:
|
616
|
+
from edn_format import Keyword
|
617
|
+
|
618
|
+
spec_component = SpadeEdnGroupSourceWhenSpec()
|
619
|
+
spec_component.file = spec_component_details.spec_graph.value(
|
620
|
+
subject=spec_component_details.spec_component_node,
|
621
|
+
predicate=MUST.fileName
|
622
|
+
)
|
623
|
+
spec_component.groupId = spec_component_details.spec_graph.value(
|
624
|
+
subject=spec_component_details.spec_component_node,
|
625
|
+
predicate=MUST.groupId
|
626
|
+
)
|
627
|
+
spec_component.queryType = spec_component_details.spec_graph.value(
|
628
|
+
subject=spec_component_details.spec_component_node,
|
629
|
+
predicate=MUST.queryType
|
630
|
+
)
|
631
|
+
|
632
|
+
# Initialize `value` by parsing the `file` attribute if available
|
633
|
+
if spec_component.file:
|
634
|
+
try:
|
635
|
+
with open(spec_component.file, "r") as edn_file:
|
636
|
+
edn_content = edn_file.read()
|
637
|
+
parsed_edn = edn_format.loads(edn_content)
|
638
|
+
|
639
|
+
# Extract group data based on group ID
|
640
|
+
step_groups = parsed_edn.get(Keyword("step-groups"), [])
|
641
|
+
group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == spec_component.groupId), None)
|
642
|
+
|
643
|
+
if not group_data:
|
644
|
+
raise ValueError(f"Group ID {spec_component.groupId} not found in EDN file {spec_component.file}")
|
645
|
+
|
646
|
+
# Create a list of WhenSpec objects
|
647
|
+
when_specs = []
|
648
|
+
for step in group_data.get(Keyword("steps"), []):
|
649
|
+
step_type = step.get(Keyword("type"))
|
650
|
+
step_file = step.get(Keyword("filepath"))
|
651
|
+
|
652
|
+
if step_type == Keyword("sparql-file"):
|
653
|
+
when_specs.append(WhenSpec(value=step_file, queryType=MUST.InsertSparql))
|
654
|
+
|
655
|
+
spec_component.value = when_specs
|
656
|
+
except Exception as e:
|
657
|
+
log.error(f"Failed to parse EDN file {spec_component.file}: {e}")
|
658
|
+
spec_component.value = None
|
659
|
+
|
561
660
|
return spec_component
|
562
661
|
|
563
662
|
|
@@ -633,7 +732,7 @@ def get_spec_from_table(subject: URIRef,
|
|
633
732
|
columns.add(row.variable.value + "_datatype")
|
634
733
|
# add an additional column for the sort order (if any) of the results
|
635
734
|
columns.add("order")
|
636
|
-
# create an empty dataframe to populate with the results
|
735
|
+
# create an empty dataframe to populate with the results data
|
637
736
|
df = pandas.DataFrame(index=list(index), columns=list(columns))
|
638
737
|
# fill the dataframe with the results data
|
639
738
|
for row in expected_results:
|
@@ -699,3 +798,84 @@ def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph
|
|
699
798
|
}}"""
|
700
799
|
is_ordered = spec_graph.query(ask_select_ordered)
|
701
800
|
return is_ordered.askAnswer
|
801
|
+
|
802
|
+
|
803
|
+
@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
|
804
|
+
def _get_spec_component_spade_edn_group_source_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
|
805
|
+
spec_component = SpadeEdnGroupSourceWhenSpec()
|
806
|
+
|
807
|
+
# Retrieve the file path for the EDN file
|
808
|
+
file_path = get_file_or_fileurl(spec_component_details)
|
809
|
+
absolute_file_path = get_file_absolute_path(spec_component_details, file_path)
|
810
|
+
|
811
|
+
# Parse the EDN file
|
812
|
+
try:
|
813
|
+
edn_content = Path(absolute_file_path).read_text()
|
814
|
+
edn_data = edn_format.loads(edn_content)
|
815
|
+
except FileNotFoundError:
|
816
|
+
raise ValueError(f"EDN file not found: {absolute_file_path}")
|
817
|
+
except edn_format.EDNDecodeError as e:
|
818
|
+
raise ValueError(f"Failed to parse EDN file {absolute_file_path}: {e}")
|
819
|
+
|
820
|
+
# Retrieve and normalize the group ID
|
821
|
+
group_id = spec_component_details.spec_graph.value(
|
822
|
+
subject=spec_component_details.spec_component_node,
|
823
|
+
predicate=MUST.groupId
|
824
|
+
)
|
825
|
+
|
826
|
+
if not group_id:
|
827
|
+
raise ValueError("groupId is missing for SpadeEdnGroupSource")
|
828
|
+
|
829
|
+
if str(group_id).startswith(':'):
|
830
|
+
group_id = str(group_id).lstrip(':')
|
831
|
+
from edn_format import Keyword
|
832
|
+
group_id = Keyword(group_id)
|
833
|
+
else:
|
834
|
+
group_id = str(group_id)
|
835
|
+
|
836
|
+
# Extract the relevant group data
|
837
|
+
step_groups = edn_data.get(Keyword("step-groups"), [])
|
838
|
+
group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == group_id), None)
|
839
|
+
|
840
|
+
if not group_data:
|
841
|
+
raise ValueError(f"Group ID {group_id} not found in EDN file {absolute_file_path}")
|
842
|
+
|
843
|
+
# Create a list of WhenSpec objects
|
844
|
+
when_specs = []
|
845
|
+
for step in group_data.get(Keyword("steps"), []):
|
846
|
+
step_type = step.get(Keyword("type"))
|
847
|
+
step_file = step.get(Keyword("filepath"))
|
848
|
+
|
849
|
+
if step_type == Keyword("sparql-file"):
|
850
|
+
try:
|
851
|
+
with open(step_file, 'r') as sparql_file:
|
852
|
+
sparql_query = sparql_file.read()
|
853
|
+
|
854
|
+
# Assume the individuals are ConstructSparql queries
|
855
|
+
# won't be true for ASK, but good for now.
|
856
|
+
when_spec = WhenSpec(
|
857
|
+
value=sparql_query,
|
858
|
+
queryType=MUST.UpdateSparql,
|
859
|
+
bindings=None
|
860
|
+
)
|
861
|
+
when_specs.append(when_spec)
|
862
|
+
except FileNotFoundError:
|
863
|
+
raise ValueError(f"SPARQL file not found: {step_file}")
|
864
|
+
|
865
|
+
spec_component.file = str(absolute_file_path)
|
866
|
+
spec_component.groupId = group_id
|
867
|
+
spec_component.value = when_specs
|
868
|
+
spec_component.queryType = MUST.SpadeEdnGroupSource # Correct query type
|
869
|
+
|
870
|
+
return spec_component
|
871
|
+
|
872
|
+
|
873
|
+
def parse_sparql_query(query_string: str):
|
874
|
+
"""
|
875
|
+
Parses a SPARQL query string and returns a query object.
|
876
|
+
"""
|
877
|
+
try:
|
878
|
+
from rdflib.plugins.sparql.parser import parseQuery
|
879
|
+
return parseQuery(query_string)
|
880
|
+
except Exception as e:
|
881
|
+
raise ValueError(f"Failed to parse SPARQL query: {e}")
|
mustrd/steprunner.py
CHANGED
@@ -23,15 +23,15 @@ SOFTWARE.
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import json
|
26
|
+
import os
|
26
27
|
|
27
|
-
from . import logger_setup
|
28
28
|
from multimethods import MultiMethod, Default
|
29
29
|
from .namespace import MUST, TRIPLESTORE
|
30
30
|
from rdflib import Graph, URIRef
|
31
31
|
from .mustrdRdfLib import execute_select as execute_select_rdflib
|
32
32
|
from .mustrdRdfLib import execute_construct as execute_construct_rdflib
|
33
33
|
from .mustrdRdfLib import execute_update as execute_update_rdflib
|
34
|
-
from .mustrdAnzo import upload_given as upload_given_anzo
|
34
|
+
from .mustrdAnzo import get_query_from_step, upload_given as upload_given_anzo
|
35
35
|
from .mustrdAnzo import execute_update as execute_update_anzo
|
36
36
|
from .mustrdAnzo import execute_construct as execute_construct_anzo
|
37
37
|
from .mustrdAnzo import execute_select as execute_select_anzo
|
@@ -39,9 +39,11 @@ from .mustrdGraphDb import upload_given as upload_given_graphdb
|
|
39
39
|
from .mustrdGraphDb import execute_update as execute_update_graphdb
|
40
40
|
from .mustrdGraphDb import execute_construct as execute_construct_graphdb
|
41
41
|
from .mustrdGraphDb import execute_select as execute_select_graphdb
|
42
|
-
from .spec_component import AnzoWhenSpec, WhenSpec
|
42
|
+
from .spec_component import AnzoWhenSpec, WhenSpec, SpadeEdnGroupSourceWhenSpec
|
43
|
+
import logging
|
44
|
+
from edn_format import loads, Keyword
|
43
45
|
|
44
|
-
log =
|
46
|
+
log = logging.getLogger(__name__)
|
45
47
|
|
46
48
|
|
47
49
|
def dispatch_upload_given(triple_store: dict, given: Graph):
|
@@ -71,59 +73,68 @@ def _upload_given_anzo(triple_store: dict, given: Graph):
|
|
71
73
|
def dispatch_run_when(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
72
74
|
ts = triple_store['type']
|
73
75
|
query_type = when.queryType
|
74
|
-
log.info(f"dispatch_run_when
|
76
|
+
log.info(f"dispatch_run_when: spec_uri={spec_uri}, ({ts},{query_type})")
|
75
77
|
return ts, query_type
|
76
78
|
|
77
79
|
|
78
|
-
|
80
|
+
run_when_impl = MultiMethod('run_when', dispatch_run_when)
|
79
81
|
|
80
82
|
|
81
|
-
@
|
83
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
|
82
84
|
def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
85
|
+
log.debug(f"_anzo_run_when_update {spec_uri} {triple_store} {when} {type(when)}")
|
86
|
+
if when.value is None:
|
87
|
+
# fetch the query from the query step on anzo
|
88
|
+
query = get_query_from_step(triple_store=when.spec_component_details.mustrd_triple_store,
|
89
|
+
query_step_uri=when.query_step_uri)
|
90
|
+
else:
|
91
|
+
# we must already have the query
|
92
|
+
query = when.value
|
93
|
+
log.debug(f"_anzo_run_when_update.query {query}")
|
94
|
+
return execute_update_anzo(triple_store, query, when.bindings)
|
95
|
+
|
96
|
+
|
97
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
|
87
98
|
def _anzo_run_when_construct(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
88
99
|
return execute_construct_anzo(triple_store, when.value, when.bindings)
|
89
100
|
|
90
101
|
|
91
|
-
@
|
102
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
|
92
103
|
def _anzo_run_when_select(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
93
104
|
return execute_select_anzo(triple_store, when.value, when.bindings)
|
94
105
|
|
95
106
|
|
96
|
-
@
|
107
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
|
97
108
|
def _graphdb_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
98
109
|
return execute_update_graphdb(triple_store, when.value, when.bindings)
|
99
110
|
|
100
111
|
|
101
|
-
@
|
112
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
|
102
113
|
def _graphdb_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
103
114
|
return execute_construct_graphdb(triple_store, when.value, when.bindings)
|
104
115
|
|
105
116
|
|
106
|
-
@
|
117
|
+
@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
|
107
118
|
def _graphdb_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
108
119
|
return execute_select_graphdb(triple_store, when.value, when.bindings)
|
109
120
|
|
110
121
|
|
111
|
-
@
|
122
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
|
112
123
|
def _rdflib_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
113
124
|
return execute_update_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
114
125
|
|
115
126
|
|
116
|
-
@
|
127
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
|
117
128
|
def _rdflib_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
118
129
|
return execute_construct_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
119
130
|
|
120
131
|
|
121
|
-
@
|
132
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
|
122
133
|
def _rdflib_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
123
134
|
return execute_select_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
|
124
135
|
|
125
136
|
|
126
|
-
@
|
137
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
|
127
138
|
def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
|
128
139
|
# run the parameters query to obtain the values for the template step and put them into a dictionary
|
129
140
|
query_parameters = json.loads(execute_select_anzo(triple_store, when.paramQuery, None))
|
@@ -150,8 +161,52 @@ def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dic
|
|
150
161
|
return result
|
151
162
|
|
152
163
|
|
153
|
-
@
|
164
|
+
@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SpadeEdnGroupSource))
|
165
|
+
def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
|
166
|
+
log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
|
167
|
+
|
168
|
+
results = []
|
169
|
+
|
170
|
+
# Iterate over the list of WhenSpec objects in `when.value`
|
171
|
+
for step_when_spec in when.value:
|
172
|
+
try:
|
173
|
+
log.info(f"Dispatching run_when for step: {step_when_spec}")
|
174
|
+
query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
|
175
|
+
log.info(f"Executed SPARQL query: {query_result}")
|
176
|
+
results.append(query_result)
|
177
|
+
except Exception as e:
|
178
|
+
log.error(f"Failed to execute SPARQL query: {e}")
|
179
|
+
|
180
|
+
log.debug(f"Final results: {results}")
|
181
|
+
return results
|
182
|
+
|
183
|
+
|
184
|
+
@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SpadeEdnGroupSource))
|
185
|
+
def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
|
186
|
+
log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
|
187
|
+
|
188
|
+
merged_graph = Graph()
|
189
|
+
|
190
|
+
# Iterate over the list of WhenSpec objects in `when.value`
|
191
|
+
for step_when_spec in when.value:
|
192
|
+
try:
|
193
|
+
if step_when_spec.queryType == MUST.UpdateSparql:
|
194
|
+
log.info(f"Dispatching run_when for UpdateSparql step: {step_when_spec}")
|
195
|
+
query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
|
196
|
+
log.info(f"Executed SPARQL query: {query_result}")
|
197
|
+
merged_graph += query_result # Merge the resulting graph
|
198
|
+
else:
|
199
|
+
log.warning(f"Unsupported queryType: {step_when_spec.queryType}")
|
200
|
+
except Exception as e:
|
201
|
+
log.error(f"Failed to execute SPARQL query: {e}")
|
202
|
+
|
203
|
+
log.debug(f"Final merged graph has {len(merged_graph)} triples.")
|
204
|
+
return merged_graph
|
205
|
+
|
206
|
+
|
207
|
+
@run_when_impl.method(Default)
|
154
208
|
def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
|
209
|
+
log.error(f"run_when not implemented for {spec_uri} {triple_store} {when}")
|
155
210
|
if when.queryType == MUST.AskSparql:
|
156
211
|
log.warning(f"Skipping {spec_uri}, SPARQL ASK not implemented.")
|
157
212
|
msg = "SPARQL ASK not implemented."
|
@@ -164,3 +219,6 @@ def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec
|
|
164
219
|
log.warning(f"Skipping {spec_uri}, {when.queryType} is not a valid SPARQL query type.")
|
165
220
|
msg = f"{when.queryType} is not a valid SPARQL query type."
|
166
221
|
raise NotImplementedError(msg)
|
222
|
+
|
223
|
+
log.debug(f"run_when registry: {run_when_impl} {dir(run_when_impl)}")
|
224
|
+
|