mustrd 0.3.0.0__py3-none-any.whl → 0.3.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mustrd/mustrdRdfLib.py CHANGED
@@ -25,6 +25,7 @@ SOFTWARE.
25
25
  from pyparsing import ParseException
26
26
  from rdflib import Graph
27
27
  from requests import RequestException
28
+ import logging
28
29
 
29
30
 
30
31
  def execute_select(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> str:
@@ -38,7 +39,13 @@ def execute_select(triple_store: dict, given: Graph, when: str, bindings: dict =
38
39
 
39
40
  def execute_construct(triple_store: dict, given: Graph, when: str, bindings: dict = None) -> Graph:
40
41
  try:
41
- return given.query(when, initBindings=bindings).graph
42
+ logger = logging.getLogger(__name__)
43
+ logger.debug(f"Executing CONSTRUCT query: {when} with bindings: {bindings}")
44
+
45
+
46
+ result_graph = given.query(when, initBindings=bindings).graph
47
+ logger.debug(f"CONSTRUCT query executed successfully, resulting graph has {len(result_graph)} triples.")
48
+ return result_graph
42
49
  except ParseException:
43
50
  raise
44
51
  except Exception as e:
mustrd/namespace.py CHANGED
@@ -38,13 +38,15 @@ class MUST(DefinedNamespace):
38
38
  AnzoQueryDrivenUpdateSparql: URIRef
39
39
  AskSparql: URIRef
40
40
  DescribeSparql: URIRef
41
-
41
+ SpadeEdnGroupSource: URIRef
42
+
42
43
  # Specification properties
43
44
  given: URIRef
44
45
  when: URIRef
45
46
  then: URIRef
46
47
  dataSource: URIRef
47
48
  file: URIRef
49
+ fileurl: URIRef
48
50
  fileName: URIRef
49
51
  queryFolder: URIRef
50
52
  queryName: URIRef
@@ -124,3 +126,10 @@ class MUSTRDTEST(DefinedNamespace):
124
126
  triplestoreSpecPath: URIRef
125
127
  hasPytestPath: URIRef
126
128
  filterOnTripleStore: URIRef
129
+
130
+ from rdflib import Namespace
131
+
132
+ MUST = Namespace("https://mustrd.com/model/")
133
+
134
+ # Add SpadeEdnGroupSource to the namespace
135
+ MUST.SpadeEdnGroupSource = MUST["SpadeEdnGroupSource"]
mustrd/spec_component.py CHANGED
@@ -33,6 +33,7 @@ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, Conjunctive
33
33
  from rdflib.exceptions import ParserError
34
34
  from rdflib.term import Node
35
35
  from rdflib.plugins.stores.memory import Memory
36
+ import edn_format
36
37
 
37
38
  from . import logger_setup
38
39
  from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
@@ -40,6 +41,7 @@ from .mustrdAnzo import get_query_from_querybuilder
40
41
  from .namespace import MUST, TRIPLESTORE
41
42
  from multimethods import MultiMethod, Default
42
43
  from .utils import get_mustrd_root
44
+ from urllib.parse import urlparse
43
45
 
44
46
  log = logger_setup.setup_logger(__name__)
45
47
 
@@ -68,6 +70,12 @@ class AnzoWhenSpec(WhenSpec):
68
70
  spec_component_details: any = None
69
71
 
70
72
 
73
+ @dataclass
74
+ class SpadeEdnGroupSourceWhenSpec(WhenSpec):
75
+ file: str = None
76
+ groupId: str = None
77
+
78
+
71
79
  @dataclass
72
80
  class ThenSpec(SpecComponent):
73
81
  value: Graph = Graph()
@@ -254,13 +262,13 @@ get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispat
254
262
 
255
263
  @get_spec_component.method((MUST.InheritedDataset, MUST.given))
256
264
  def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
257
- spec_component = init_spec_component(spec_component_details.predicate)
265
+ spec_component = GivenSpec()
258
266
  return spec_component
259
267
 
260
268
 
261
269
  @get_spec_component.method((MUST.FolderDataset, MUST.given))
262
270
  def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
263
- spec_component = init_spec_component(spec_component_details.predicate)
271
+ spec_component = GivenSpec()
264
272
 
265
273
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
266
274
  predicate=MUST.fileName)
@@ -276,7 +284,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
276
284
 
277
285
  @get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
278
286
  def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
279
- spec_component = init_spec_component(spec_component_details.predicate)
287
+ spec_component = WhenSpec()
280
288
 
281
289
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
282
290
  predicate=MUST.fileName)
@@ -291,7 +299,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
291
299
 
292
300
  @get_spec_component.method((MUST.FolderDataset, MUST.then))
293
301
  def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
294
- spec_component = init_spec_component(spec_component_details.predicate)
302
+ spec_component = ThenSpec()
295
303
 
296
304
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
297
305
  predicate=MUST.fileName)
@@ -301,17 +309,44 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
301
309
 
302
310
 
303
311
  @get_spec_component.method((MUST.FileDataset, MUST.given))
304
- @get_spec_component.method((MUST.FileDataset, MUST.then))
305
312
  def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
306
- spec_component = init_spec_component(spec_component_details.predicate)
313
+ spec_component = GivenSpec()
314
+ return load_spec_component(spec_component_details, spec_component)
315
+
316
+ @get_spec_component.method((MUST.FileDataset, MUST.then))
317
+ def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> ThenSpec:
318
+ spec_component = ThenSpec()
307
319
  return load_spec_component(spec_component_details, spec_component)
308
320
 
309
321
 
310
322
  def load_spec_component(spec_component_details, spec_component):
311
- file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
312
- predicate=MUST.file)))
323
+ file_path = get_file_or_fileurl(spec_component_details)
324
+ file_path = Path(str(file_path))
313
325
  return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
314
326
 
327
+ def get_file_or_fileurl(spec_component_details):
328
+ file_path = spec_component_details.spec_graph.value(
329
+ subject=spec_component_details.spec_component_node,
330
+ predicate=MUST.file
331
+ )
332
+ if file_path is None:
333
+ file_path = spec_component_details.spec_graph.value(
334
+ subject=spec_component_details.spec_component_node,
335
+ predicate=MUST.fileurl
336
+ )
337
+ if file_path is not None and str(file_path).startswith("file://"):
338
+ # Remove the 'file://' scheme to get the local path
339
+ # we do it this quick and dirty way because the urlparse library assumes absolute paths, and strips our leading ./
340
+ # need to confirm this approach is windows safe.
341
+
342
+ new_path = str(file_path)[7:]
343
+ log.debug(f"converted {file_path=} to {new_path=}")
344
+ file_path = new_path
345
+ if file_path is None:
346
+ # shacl validation will catch this, but we want to raise a more specific error
347
+ raise ValueError("Neither MUST.file nor MUST.fileurl found for the spec component node")
348
+ return file_path
349
+
315
350
 
316
351
  def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
317
352
  if path.is_dir():
@@ -342,10 +377,10 @@ def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
342
377
 
343
378
  @get_spec_component.method((MUST.FileSparqlSource, MUST.when))
344
379
  def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
345
- spec_component = init_spec_component(spec_component_details.predicate)
346
-
347
- file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
348
- predicate=MUST.file)))
380
+ spec_component = WhenSpec()
381
+ file_path = get_file_or_fileurl(spec_component_details)
382
+ # file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
383
+ # predicate=MUST.file)))
349
384
  spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
350
385
 
351
386
  spec_component.queryType = spec_component_details.spec_graph.value(
@@ -357,7 +392,7 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
357
392
 
358
393
  @get_spec_component.method((MUST.TextSparqlSource, MUST.when))
359
394
  def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
360
- spec_component = init_spec_component(spec_component_details.predicate)
395
+ spec_component = WhenSpec()
361
396
 
362
397
  # Get specComponent directly from config file (in text string)
363
398
  spec_component.value = str(
@@ -371,22 +406,35 @@ def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDe
371
406
  return spec_component
372
407
 
373
408
 
374
- # https://github.com/Semantic-partners/mustrd/issues/98
375
- @get_spec_component.method((MUST.HttpDataset, MUST.given))
376
- @get_spec_component.method((MUST.HttpDataset, MUST.when))
377
- @get_spec_component.method((MUST.HttpDataset, MUST.then))
378
- def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
379
- spec_component = init_spec_component(spec_component_details.predicate)
380
-
409
+ def _get_spec_component_HttpDataset_shared(spec_component_details: SpecComponentDetails, spec_component):
381
410
  # Get specComponent with http GET protocol
382
- spec_component.value = requests.get(str(
383
- spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
384
- predicate=MUST.dataSourceUrl)).content)
385
- spec_component.queryType = spec_component_details.spec_graph.value(
411
+ url = spec_component_details.spec_graph.value(
386
412
  subject=spec_component_details.spec_component_node,
387
- predicate=MUST.queryType)
413
+ predicate=MUST.dataSourceUrl
414
+ )
415
+ if not url:
416
+ raise ValueError("MUST.dataSourceUrl is missing for HttpDataset")
417
+ response = requests.get(str(url))
418
+ response.raise_for_status()
419
+ spec_component.value = response.content
420
+ if hasattr(spec_component, "queryType"):
421
+ spec_component.queryType = spec_component_details.spec_graph.value(
422
+ subject=spec_component_details.spec_component_node,
423
+ predicate=MUST.queryType)
388
424
  return spec_component
389
425
 
426
+ @get_spec_component.method((MUST.HttpDataset, MUST.given))
427
+ def _get_spec_component_HttpDataset_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
428
+ return _get_spec_component_HttpDataset_shared(spec_component_details, GivenSpec())
429
+
430
+ @get_spec_component.method((MUST.HttpDataset, MUST.when))
431
+ def _get_spec_component_HttpDataset_when(spec_component_details: SpecComponentDetails) -> WhenSpec:
432
+ return _get_spec_component_HttpDataset_shared(spec_component_details, WhenSpec())
433
+
434
+ @get_spec_component.method((MUST.HttpDataset, MUST.then))
435
+ def _get_spec_component_HttpDataset_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
436
+ return _get_spec_component_HttpDataset_shared(spec_component_details, ThenSpec())
437
+
390
438
 
391
439
  @get_spec_component.method((MUST.TableDataset, MUST.then))
392
440
  def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
@@ -407,7 +455,7 @@ def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails)
407
455
 
408
456
  @get_spec_component.method((MUST.EmptyGraph, MUST.then))
409
457
  def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
410
- spec_component = init_spec_component(spec_component_details.predicate)
458
+ spec_component = ThenSpec()
411
459
 
412
460
  return spec_component
413
461
 
@@ -415,7 +463,11 @@ def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails)
415
463
  @get_spec_component.method((MUST.StatementsDataset, MUST.given))
416
464
  @get_spec_component.method((MUST.StatementsDataset, MUST.then))
417
465
  def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
418
- spec_component = init_spec_component(spec_component_details.predicate)
466
+ # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
467
+ if spec_component_details.predicate == MUST.given:
468
+ spec_component = GivenSpec()
469
+ else:
470
+ spec_component = ThenSpec()
419
471
  store = Memory()
420
472
  g = URIRef("http://localhost:7200/test-graph")
421
473
  spec_component.value = ConjunctiveGraph(store=store)
@@ -430,7 +482,11 @@ def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentD
430
482
  @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
431
483
  @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
432
484
  def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
433
- spec_component = init_spec_component(spec_component_details.predicate)
485
+ # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
486
+ if spec_component_details.predicate == MUST.given:
487
+ spec_component = GivenSpec()
488
+ else:
489
+ spec_component = ThenSpec()
434
490
 
435
491
  if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
436
492
  # Get GIVEN or THEN from anzo graphmart
@@ -443,7 +499,7 @@ def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecCompone
443
499
 
444
500
  @get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
445
501
  def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
446
- spec_component = init_spec_component(spec_component_details.predicate)
502
+ spec_component = WhenSpec()
447
503
 
448
504
  # Get WHEN specComponent from query builder
449
505
  if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
@@ -488,7 +544,7 @@ def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: Sp
488
544
 
489
545
  @get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
490
546
  def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent: # noqa
491
- spec_component = init_spec_component(
547
+ spec_component = WhenSpec(
492
548
  spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
493
549
 
494
550
  # Get WHEN specComponent from query builder
@@ -524,8 +580,8 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
524
580
  else:
525
581
  raise ValueError("This test specification is specific to Anzo and can only be run against that platform.")
526
582
  for query in queries:
527
- spec_component = init_spec_component(spec_component_details.predicate,
528
- spec_component_details.mustrd_triple_store["type"])
583
+ spec_component = WhenSpec(
584
+ spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
529
585
  spec_component.value = query.get("query")
530
586
  spec_component.paramQuery = query.get("param_query")
531
587
  spec_component.queryTemplate = query.get("query_template")
@@ -542,23 +598,65 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
542
598
 
543
599
  @get_spec_component.method(Default)
544
600
  def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
601
+ valid_combinations = [key for key in get_spec_component.methods.keys() if key != Default]
602
+
603
+ if (spec_component_details.data_source_type, spec_component_details.predicate) not in valid_combinations:
604
+ valid_types = ', '.join([f"({data_source_type}, {predicate})" for data_source_type, predicate in valid_combinations])
605
+ raise ValueError(
606
+ f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
607
+ f"spec component ({spec_component_details.predicate}). Valid combinations are: {valid_types}"
608
+ )
545
609
  raise ValueError(
546
610
  f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
547
611
  f"spec component ({spec_component_details.predicate})")
548
612
 
549
613
 
550
- def init_spec_component(predicate: URIRef, triple_store_type: URIRef = None) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec: # noqa
551
- if predicate == MUST.given:
552
- spec_component = GivenSpec()
553
- elif predicate == MUST.when:
554
- if triple_store_type == TRIPLESTORE.Anzo:
555
- spec_component = AnzoWhenSpec()
556
- else:
557
- spec_component = WhenSpec()
558
- elif predicate == MUST.then:
559
- spec_component = ThenSpec()
560
- else:
561
- spec_component = SpecComponent()
614
+ @get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
615
+ def _get_spec_component_spadeednsource_when(spec_component_details: SpecComponentDetails) -> SpadeEdnGroupSourceWhenSpec:
616
+ from edn_format import Keyword
617
+
618
+ spec_component = SpadeEdnGroupSourceWhenSpec()
619
+ spec_component.file = spec_component_details.spec_graph.value(
620
+ subject=spec_component_details.spec_component_node,
621
+ predicate=MUST.fileName
622
+ )
623
+ spec_component.groupId = spec_component_details.spec_graph.value(
624
+ subject=spec_component_details.spec_component_node,
625
+ predicate=MUST.groupId
626
+ )
627
+ spec_component.queryType = spec_component_details.spec_graph.value(
628
+ subject=spec_component_details.spec_component_node,
629
+ predicate=MUST.queryType
630
+ )
631
+
632
+ # Initialize `value` by parsing the `file` attribute if available
633
+ if spec_component.file:
634
+ try:
635
+ with open(spec_component.file, "r") as edn_file:
636
+ edn_content = edn_file.read()
637
+ parsed_edn = edn_format.loads(edn_content)
638
+
639
+ # Extract group data based on group ID
640
+ step_groups = parsed_edn.get(Keyword("step-groups"), [])
641
+ group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == spec_component.groupId), None)
642
+
643
+ if not group_data:
644
+ raise ValueError(f"Group ID {spec_component.groupId} not found in EDN file {spec_component.file}")
645
+
646
+ # Create a list of WhenSpec objects
647
+ when_specs = []
648
+ for step in group_data.get(Keyword("steps"), []):
649
+ step_type = step.get(Keyword("type"))
650
+ step_file = step.get(Keyword("filepath"))
651
+
652
+ if step_type == Keyword("sparql-file"):
653
+ when_specs.append(WhenSpec(value=step_file, queryType=MUST.InsertSparql))
654
+
655
+ spec_component.value = when_specs
656
+ except Exception as e:
657
+ log.error(f"Failed to parse EDN file {spec_component.file}: {e}")
658
+ spec_component.value = None
659
+
562
660
  return spec_component
563
661
 
564
662
 
@@ -634,7 +732,7 @@ def get_spec_from_table(subject: URIRef,
634
732
  columns.add(row.variable.value + "_datatype")
635
733
  # add an additional column for the sort order (if any) of the results
636
734
  columns.add("order")
637
- # create an empty dataframe to populate with the results
735
+ # create an empty dataframe to populate with the results data
638
736
  df = pandas.DataFrame(index=list(index), columns=list(columns))
639
737
  # fill the dataframe with the results data
640
738
  for row in expected_results:
@@ -700,3 +798,84 @@ def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph
700
798
  }}"""
701
799
  is_ordered = spec_graph.query(ask_select_ordered)
702
800
  return is_ordered.askAnswer
801
+
802
+
803
+ @get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
804
+ def _get_spec_component_spade_edn_group_source_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
805
+ spec_component = SpadeEdnGroupSourceWhenSpec()
806
+
807
+ # Retrieve the file path for the EDN file
808
+ file_path = get_file_or_fileurl(spec_component_details)
809
+ absolute_file_path = get_file_absolute_path(spec_component_details, file_path)
810
+
811
+ # Parse the EDN file
812
+ try:
813
+ edn_content = Path(absolute_file_path).read_text()
814
+ edn_data = edn_format.loads(edn_content)
815
+ except FileNotFoundError:
816
+ raise ValueError(f"EDN file not found: {absolute_file_path}")
817
+ except edn_format.EDNDecodeError as e:
818
+ raise ValueError(f"Failed to parse EDN file {absolute_file_path}: {e}")
819
+
820
+ # Retrieve and normalize the group ID
821
+ group_id = spec_component_details.spec_graph.value(
822
+ subject=spec_component_details.spec_component_node,
823
+ predicate=MUST.groupId
824
+ )
825
+
826
+ if not group_id:
827
+ raise ValueError("groupId is missing for SpadeEdnGroupSource")
828
+
829
+ if str(group_id).startswith(':'):
830
+ group_id = str(group_id).lstrip(':')
831
+ from edn_format import Keyword
832
+ group_id = Keyword(group_id)
833
+ else:
834
+ group_id = str(group_id)
835
+
836
+ # Extract the relevant group data
837
+ step_groups = edn_data.get(Keyword("step-groups"), [])
838
+ group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == group_id), None)
839
+
840
+ if not group_data:
841
+ raise ValueError(f"Group ID {group_id} not found in EDN file {absolute_file_path}")
842
+
843
+ # Create a list of WhenSpec objects
844
+ when_specs = []
845
+ for step in group_data.get(Keyword("steps"), []):
846
+ step_type = step.get(Keyword("type"))
847
+ step_file = step.get(Keyword("filepath"))
848
+
849
+ if step_type == Keyword("sparql-file"):
850
+ try:
851
+ with open(step_file, 'r') as sparql_file:
852
+ sparql_query = sparql_file.read()
853
+
854
+ # Assume the individuals are ConstructSparql queries
855
+ # won't be true for ASK, but good for now.
856
+ when_spec = WhenSpec(
857
+ value=sparql_query,
858
+ queryType=MUST.UpdateSparql,
859
+ bindings=None
860
+ )
861
+ when_specs.append(when_spec)
862
+ except FileNotFoundError:
863
+ raise ValueError(f"SPARQL file not found: {step_file}")
864
+
865
+ spec_component.file = str(absolute_file_path)
866
+ spec_component.groupId = group_id
867
+ spec_component.value = when_specs
868
+ spec_component.queryType = MUST.SpadeEdnGroupSource # Correct query type
869
+
870
+ return spec_component
871
+
872
+
873
+ def parse_sparql_query(query_string: str):
874
+ """
875
+ Parses a SPARQL query string and returns a query object.
876
+ """
877
+ try:
878
+ from rdflib.plugins.sparql.parser import parseQuery
879
+ return parseQuery(query_string)
880
+ except Exception as e:
881
+ raise ValueError(f"Failed to parse SPARQL query: {e}")
mustrd/steprunner.py CHANGED
@@ -23,6 +23,7 @@ SOFTWARE.
23
23
  """
24
24
 
25
25
  import json
26
+ import os
26
27
 
27
28
  from multimethods import MultiMethod, Default
28
29
  from .namespace import MUST, TRIPLESTORE
@@ -38,8 +39,9 @@ from .mustrdGraphDb import upload_given as upload_given_graphdb
38
39
  from .mustrdGraphDb import execute_update as execute_update_graphdb
39
40
  from .mustrdGraphDb import execute_construct as execute_construct_graphdb
40
41
  from .mustrdGraphDb import execute_select as execute_select_graphdb
41
- from .spec_component import AnzoWhenSpec, WhenSpec
42
+ from .spec_component import AnzoWhenSpec, WhenSpec, SpadeEdnGroupSourceWhenSpec
42
43
  import logging
44
+ from edn_format import loads, Keyword
43
45
 
44
46
  log = logging.getLogger(__name__)
45
47
 
@@ -71,14 +73,14 @@ def _upload_given_anzo(triple_store: dict, given: Graph):
71
73
  def dispatch_run_when(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
72
74
  ts = triple_store['type']
73
75
  query_type = when.queryType
74
- log.info(f"dispatch_run_when to SPARQL type {query_type} to {ts}")
76
+ log.info(f"dispatch_run_when: spec_uri={spec_uri}, ({ts},{query_type})")
75
77
  return ts, query_type
76
78
 
77
79
 
78
- run_when = MultiMethod('run_when', dispatch_run_when)
80
+ run_when_impl = MultiMethod('run_when', dispatch_run_when)
79
81
 
80
82
 
81
- @run_when.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
83
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
82
84
  def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
83
85
  log.debug(f"_anzo_run_when_update {spec_uri} {triple_store} {when} {type(when)}")
84
86
  if when.value is None:
@@ -92,47 +94,47 @@ def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSp
92
94
  return execute_update_anzo(triple_store, query, when.bindings)
93
95
 
94
96
 
95
- @run_when.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
97
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
96
98
  def _anzo_run_when_construct(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
97
99
  return execute_construct_anzo(triple_store, when.value, when.bindings)
98
100
 
99
101
 
100
- @run_when.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
102
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
101
103
  def _anzo_run_when_select(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
102
104
  return execute_select_anzo(triple_store, when.value, when.bindings)
103
105
 
104
106
 
105
- @run_when.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
107
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
106
108
  def _graphdb_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
107
109
  return execute_update_graphdb(triple_store, when.value, when.bindings)
108
110
 
109
111
 
110
- @run_when.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
112
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
111
113
  def _graphdb_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
112
114
  return execute_construct_graphdb(triple_store, when.value, when.bindings)
113
115
 
114
116
 
115
- @run_when.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
117
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
116
118
  def _graphdb_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
117
119
  return execute_select_graphdb(triple_store, when.value, when.bindings)
118
120
 
119
121
 
120
- @run_when.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
122
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
121
123
  def _rdflib_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
122
124
  return execute_update_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
123
125
 
124
126
 
125
- @run_when.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
127
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
126
128
  def _rdflib_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
127
129
  return execute_construct_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
128
130
 
129
131
 
130
- @run_when.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
132
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
131
133
  def _rdflib_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
132
134
  return execute_select_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
133
135
 
134
136
 
135
- @run_when.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
137
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
136
138
  def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
137
139
  # run the parameters query to obtain the values for the template step and put them into a dictionary
138
140
  query_parameters = json.loads(execute_select_anzo(triple_store, when.paramQuery, None))
@@ -159,7 +161,50 @@ def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dic
159
161
  return result
160
162
 
161
163
 
162
- @run_when.method(Default)
164
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.SpadeEdnGroupSource))
165
+ def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
166
+ log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
167
+
168
+ results = []
169
+
170
+ # Iterate over the list of WhenSpec objects in `when.value`
171
+ for step_when_spec in when.value:
172
+ try:
173
+ log.info(f"Dispatching run_when for step: {step_when_spec}")
174
+ query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
175
+ log.info(f"Executed SPARQL query: {query_result}")
176
+ results.append(query_result)
177
+ except Exception as e:
178
+ log.error(f"Failed to execute SPARQL query: {e}")
179
+
180
+ log.debug(f"Final results: {results}")
181
+ return results
182
+
183
+
184
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SpadeEdnGroupSource))
185
+ def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
186
+ log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
187
+
188
+ merged_graph = Graph()
189
+
190
+ # Iterate over the list of WhenSpec objects in `when.value`
191
+ for step_when_spec in when.value:
192
+ try:
193
+ if step_when_spec.queryType == MUST.UpdateSparql:
194
+ log.info(f"Dispatching run_when for UpdateSparql step: {step_when_spec}")
195
+ query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
196
+ log.info(f"Executed SPARQL query: {query_result}")
197
+ merged_graph += query_result # Merge the resulting graph
198
+ else:
199
+ log.warning(f"Unsupported queryType: {step_when_spec.queryType}")
200
+ except Exception as e:
201
+ log.error(f"Failed to execute SPARQL query: {e}")
202
+
203
+ log.debug(f"Final merged graph has {len(merged_graph)} triples.")
204
+ return merged_graph
205
+
206
+
207
+ @run_when_impl.method(Default)
163
208
  def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
164
209
  log.error(f"run_when not implemented for {spec_uri} {triple_store} {when}")
165
210
  if when.queryType == MUST.AskSparql:
@@ -174,3 +219,6 @@ def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec
174
219
  log.warning(f"Skipping {spec_uri}, {when.queryType} is not a valid SPARQL query type.")
175
220
  msg = f"{when.queryType} is not a valid SPARQL query type."
176
221
  raise NotImplementedError(msg)
222
+
223
+ log.debug(f"run_when registry: {run_when_impl} {dir(run_when_impl)}")
224
+