mustrd 0.2.7a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mustrd/spec_component.py CHANGED
@@ -33,13 +33,15 @@ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, Conjunctive
33
33
  from rdflib.exceptions import ParserError
34
34
  from rdflib.term import Node
35
35
  from rdflib.plugins.stores.memory import Memory
36
+ import edn_format
36
37
 
37
38
  from . import logger_setup
38
- from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart
39
- from .mustrdAnzo import get_query_from_querybuilder, get_query_from_step
39
+ from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
40
+ from .mustrdAnzo import get_query_from_querybuilder
40
41
  from .namespace import MUST, TRIPLESTORE
41
42
  from multimethods import MultiMethod, Default
42
43
  from .utils import get_mustrd_root
44
+ from urllib.parse import urlparse
43
45
 
44
46
  log = logger_setup.setup_logger(__name__)
45
47
 
@@ -65,6 +67,13 @@ class WhenSpec(SpecComponent):
65
67
  class AnzoWhenSpec(WhenSpec):
66
68
  paramQuery: str = None
67
69
  queryTemplate: str = None
70
+ spec_component_details: any = None
71
+
72
+
73
+ @dataclass
74
+ class SpadeEdnGroupSourceWhenSpec(WhenSpec):
75
+ file: str = None
76
+ groupId: str = None
68
77
 
69
78
 
70
79
  @dataclass
@@ -108,6 +117,7 @@ def parse_spec_component(subject: URIRef,
108
117
  for spec_component_node in spec_component_nodes:
109
118
  data_source_types = get_data_source_types(subject, predicate, spec_graph, spec_component_node)
110
119
  for data_source_type in data_source_types:
120
+ log.debug(f"parse_spec_component {spec_component_node} {data_source_type} {mustrd_triple_store=}")
111
121
  spec_component_details = SpecComponentDetails(
112
122
  subject=subject,
113
123
  predicate=predicate,
@@ -117,6 +127,9 @@ def parse_spec_component(subject: URIRef,
117
127
  data_source_type=data_source_type,
118
128
  run_config=run_config,
119
129
  root_paths=get_components_roots(spec_graph, subject, run_config))
130
+
131
+ # get_spec_component potentially talks to anzo for EVERY spec, massively slowing things down
132
+ # can we defer it to run time?
120
133
  spec_component = get_spec_component(spec_component_details)
121
134
  if isinstance(spec_component, list):
122
135
  spec_components += spec_component
@@ -249,13 +262,13 @@ get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispat
249
262
 
250
263
  @get_spec_component.method((MUST.InheritedDataset, MUST.given))
251
264
  def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
252
- spec_component = init_spec_component(spec_component_details.predicate)
265
+ spec_component = GivenSpec()
253
266
  return spec_component
254
267
 
255
268
 
256
269
  @get_spec_component.method((MUST.FolderDataset, MUST.given))
257
270
  def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
258
- spec_component = init_spec_component(spec_component_details.predicate)
271
+ spec_component = GivenSpec()
259
272
 
260
273
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
261
274
  predicate=MUST.fileName)
@@ -271,7 +284,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
271
284
 
272
285
  @get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
273
286
  def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
274
- spec_component = init_spec_component(spec_component_details.predicate)
287
+ spec_component = WhenSpec()
275
288
 
276
289
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
277
290
  predicate=MUST.fileName)
@@ -286,7 +299,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
286
299
 
287
300
  @get_spec_component.method((MUST.FolderDataset, MUST.then))
288
301
  def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
289
- spec_component = init_spec_component(spec_component_details.predicate)
302
+ spec_component = ThenSpec()
290
303
 
291
304
  file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
292
305
  predicate=MUST.fileName)
@@ -296,17 +309,44 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
296
309
 
297
310
 
298
311
  @get_spec_component.method((MUST.FileDataset, MUST.given))
299
- @get_spec_component.method((MUST.FileDataset, MUST.then))
300
312
  def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
301
- spec_component = init_spec_component(spec_component_details.predicate)
313
+ spec_component = GivenSpec()
314
+ return load_spec_component(spec_component_details, spec_component)
315
+
316
+ @get_spec_component.method((MUST.FileDataset, MUST.then))
317
+ def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> ThenSpec:
318
+ spec_component = ThenSpec()
302
319
  return load_spec_component(spec_component_details, spec_component)
303
320
 
304
321
 
305
322
  def load_spec_component(spec_component_details, spec_component):
306
- file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
307
- predicate=MUST.file)))
323
+ file_path = get_file_or_fileurl(spec_component_details)
324
+ file_path = Path(str(file_path))
308
325
  return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
309
326
 
327
+ def get_file_or_fileurl(spec_component_details):
328
+ file_path = spec_component_details.spec_graph.value(
329
+ subject=spec_component_details.spec_component_node,
330
+ predicate=MUST.file
331
+ )
332
+ if file_path is None:
333
+ file_path = spec_component_details.spec_graph.value(
334
+ subject=spec_component_details.spec_component_node,
335
+ predicate=MUST.fileurl
336
+ )
337
+ if file_path is not None and str(file_path).startswith("file://"):
338
+ # Remove the 'file://' scheme to get the local path
339
+ # we do it this quick and dirty way because the urlparse library assumes absolute paths, and strips our leading ./
340
+ # need to confirm this approach is windows safe.
341
+
342
+ new_path = str(file_path)[7:]
343
+ log.debug(f"converted {file_path=} to {new_path=}")
344
+ file_path = new_path
345
+ if file_path is None:
346
+ # shacl validation will catch this, but we want to raise a more specific error
347
+ raise ValueError("Neither MUST.file nor MUST.fileurl found for the spec component node")
348
+ return file_path
349
+
310
350
 
311
351
  def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
312
352
  if path.is_dir():
@@ -337,10 +377,10 @@ def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
337
377
 
338
378
  @get_spec_component.method((MUST.FileSparqlSource, MUST.when))
339
379
  def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
340
- spec_component = init_spec_component(spec_component_details.predicate)
341
-
342
- file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
343
- predicate=MUST.file)))
380
+ spec_component = WhenSpec()
381
+ file_path = get_file_or_fileurl(spec_component_details)
382
+ # file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
383
+ # predicate=MUST.file)))
344
384
  spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
345
385
 
346
386
  spec_component.queryType = spec_component_details.spec_graph.value(
@@ -352,7 +392,7 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
352
392
 
353
393
  @get_spec_component.method((MUST.TextSparqlSource, MUST.when))
354
394
  def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
355
- spec_component = init_spec_component(spec_component_details.predicate)
395
+ spec_component = WhenSpec()
356
396
 
357
397
  # Get specComponent directly from config file (in text string)
358
398
  spec_component.value = str(
@@ -366,22 +406,35 @@ def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDe
366
406
  return spec_component
367
407
 
368
408
 
369
- # https://github.com/Semantic-partners/mustrd/issues/98
370
- @get_spec_component.method((MUST.HttpDataset, MUST.given))
371
- @get_spec_component.method((MUST.HttpDataset, MUST.when))
372
- @get_spec_component.method((MUST.HttpDataset, MUST.then))
373
- def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
374
- spec_component = init_spec_component(spec_component_details.predicate)
375
-
409
+ def _get_spec_component_HttpDataset_shared(spec_component_details: SpecComponentDetails, spec_component):
376
410
  # Get specComponent with http GET protocol
377
- spec_component.value = requests.get(str(
378
- spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
379
- predicate=MUST.dataSourceUrl)).content)
380
- spec_component.queryType = spec_component_details.spec_graph.value(
411
+ url = spec_component_details.spec_graph.value(
381
412
  subject=spec_component_details.spec_component_node,
382
- predicate=MUST.queryType)
413
+ predicate=MUST.dataSourceUrl
414
+ )
415
+ if not url:
416
+ raise ValueError("MUST.dataSourceUrl is missing for HttpDataset")
417
+ response = requests.get(str(url))
418
+ response.raise_for_status()
419
+ spec_component.value = response.content
420
+ if hasattr(spec_component, "queryType"):
421
+ spec_component.queryType = spec_component_details.spec_graph.value(
422
+ subject=spec_component_details.spec_component_node,
423
+ predicate=MUST.queryType)
383
424
  return spec_component
384
425
 
426
+ @get_spec_component.method((MUST.HttpDataset, MUST.given))
427
+ def _get_spec_component_HttpDataset_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
428
+ return _get_spec_component_HttpDataset_shared(spec_component_details, GivenSpec())
429
+
430
+ @get_spec_component.method((MUST.HttpDataset, MUST.when))
431
+ def _get_spec_component_HttpDataset_when(spec_component_details: SpecComponentDetails) -> WhenSpec:
432
+ return _get_spec_component_HttpDataset_shared(spec_component_details, WhenSpec())
433
+
434
+ @get_spec_component.method((MUST.HttpDataset, MUST.then))
435
+ def _get_spec_component_HttpDataset_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
436
+ return _get_spec_component_HttpDataset_shared(spec_component_details, ThenSpec())
437
+
385
438
 
386
439
  @get_spec_component.method((MUST.TableDataset, MUST.then))
387
440
  def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
@@ -402,7 +455,7 @@ def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails)
402
455
 
403
456
  @get_spec_component.method((MUST.EmptyGraph, MUST.then))
404
457
  def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
405
- spec_component = init_spec_component(spec_component_details.predicate)
458
+ spec_component = ThenSpec()
406
459
 
407
460
  return spec_component
408
461
 
@@ -410,7 +463,11 @@ def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails)
410
463
  @get_spec_component.method((MUST.StatementsDataset, MUST.given))
411
464
  @get_spec_component.method((MUST.StatementsDataset, MUST.then))
412
465
  def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
413
- spec_component = init_spec_component(spec_component_details.predicate)
466
+ # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
467
+ if spec_component_details.predicate == MUST.given:
468
+ spec_component = GivenSpec()
469
+ else:
470
+ spec_component = ThenSpec()
414
471
  store = Memory()
415
472
  g = URIRef("http://localhost:7200/test-graph")
416
473
  spec_component.value = ConjunctiveGraph(store=store)
@@ -425,18 +482,15 @@ def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentD
425
482
  @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
426
483
  @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
427
484
  def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
428
- spec_component = init_spec_component(spec_component_details.predicate)
485
+ # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
486
+ if spec_component_details.predicate == MUST.given:
487
+ spec_component = GivenSpec()
488
+ else:
489
+ spec_component = ThenSpec()
429
490
 
430
491
  if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
431
492
  # Get GIVEN or THEN from anzo graphmart
432
- graphmart = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
433
- predicate=MUST.graphmart)
434
- layer = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
435
- predicate=MUST.layer)
436
- spec_component.value = get_spec_component_from_graphmart(
437
- triple_store=spec_component_details.mustrd_triple_store,
438
- graphmart=graphmart,
439
- layer=layer)
493
+ spec_component.spec_component_details = spec_component_details
440
494
  else:
441
495
  raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartDataset}")
442
496
 
@@ -445,7 +499,7 @@ def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecCompone
445
499
 
446
500
  @get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
447
501
  def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
448
- spec_component = init_spec_component(spec_component_details.predicate)
502
+ spec_component = WhenSpec()
449
503
 
450
504
  # Get WHEN specComponent from query builder
451
505
  if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
@@ -468,14 +522,16 @@ def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: Spe
468
522
 
469
523
  @get_spec_component.method((MUST.AnzoGraphmartStepSparqlSource, MUST.when))
470
524
  def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
471
- spec_component = init_spec_component(spec_component_details.predicate)
525
+ spec_component = AnzoWhenSpec()
472
526
 
473
527
  # Get WHEN specComponent from query builder
474
528
  if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
475
529
  query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
476
530
  predicate=MUST.anzoQueryStep)
477
- spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
478
- query_step_uri=query_step_uri)
531
+ spec_component.spec_component_details = spec_component_details
532
+ spec_component.query_step_uri = query_step_uri
533
+ # spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
534
+ # query_step_uri=query_step_uri)
479
535
  # If anzo specific function is called but no anzo defined
480
536
  else:
481
537
  raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartStepSparqlSource}")
@@ -488,7 +544,7 @@ def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: Sp
488
544
 
489
545
  @get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
490
546
  def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent: # noqa
491
- spec_component = init_spec_component(
547
+ spec_component = WhenSpec(
492
548
  spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
493
549
 
494
550
  # Get WHEN specComponent from query builder
@@ -524,11 +580,12 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
524
580
  else:
525
581
  raise ValueError("This test specification is specific to Anzo and can only be run against that platform.")
526
582
  for query in queries:
527
- spec_component = init_spec_component(spec_component_details.predicate,
528
- spec_component_details.mustrd_triple_store["type"])
583
+ spec_component = WhenSpec(
584
+ spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
529
585
  spec_component.value = query.get("query")
530
586
  spec_component.paramQuery = query.get("param_query")
531
587
  spec_component.queryTemplate = query.get("query_template")
588
+ spec_component.spec_component_details = spec_component_details
532
589
  if spec_component.value:
533
590
  spec_component.queryType = spec_component_details.spec_graph.value(
534
591
  subject=spec_component_details.spec_component_node,
@@ -541,23 +598,65 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
541
598
 
542
599
  @get_spec_component.method(Default)
543
600
  def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
601
+ valid_combinations = [key for key in get_spec_component.methods.keys() if key != Default]
602
+
603
+ if (spec_component_details.data_source_type, spec_component_details.predicate) not in valid_combinations:
604
+ valid_types = ', '.join([f"({data_source_type}, {predicate})" for data_source_type, predicate in valid_combinations])
605
+ raise ValueError(
606
+ f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
607
+ f"spec component ({spec_component_details.predicate}). Valid combinations are: {valid_types}"
608
+ )
544
609
  raise ValueError(
545
610
  f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
546
611
  f"spec component ({spec_component_details.predicate})")
547
612
 
548
613
 
549
- def init_spec_component(predicate: URIRef, triple_store_type: URIRef = None) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec: # noqa
550
- if predicate == MUST.given:
551
- spec_component = GivenSpec()
552
- elif predicate == MUST.when:
553
- if triple_store_type == TRIPLESTORE.Anzo:
554
- spec_component = AnzoWhenSpec()
555
- else:
556
- spec_component = WhenSpec()
557
- elif predicate == MUST.then:
558
- spec_component = ThenSpec()
559
- else:
560
- spec_component = SpecComponent()
614
+ @get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
615
+ def _get_spec_component_spadeednsource_when(spec_component_details: SpecComponentDetails) -> SpadeEdnGroupSourceWhenSpec:
616
+ from edn_format import Keyword
617
+
618
+ spec_component = SpadeEdnGroupSourceWhenSpec()
619
+ spec_component.file = spec_component_details.spec_graph.value(
620
+ subject=spec_component_details.spec_component_node,
621
+ predicate=MUST.fileName
622
+ )
623
+ spec_component.groupId = spec_component_details.spec_graph.value(
624
+ subject=spec_component_details.spec_component_node,
625
+ predicate=MUST.groupId
626
+ )
627
+ spec_component.queryType = spec_component_details.spec_graph.value(
628
+ subject=spec_component_details.spec_component_node,
629
+ predicate=MUST.queryType
630
+ )
631
+
632
+ # Initialize `value` by parsing the `file` attribute if available
633
+ if spec_component.file:
634
+ try:
635
+ with open(spec_component.file, "r") as edn_file:
636
+ edn_content = edn_file.read()
637
+ parsed_edn = edn_format.loads(edn_content)
638
+
639
+ # Extract group data based on group ID
640
+ step_groups = parsed_edn.get(Keyword("step-groups"), [])
641
+ group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == spec_component.groupId), None)
642
+
643
+ if not group_data:
644
+ raise ValueError(f"Group ID {spec_component.groupId} not found in EDN file {spec_component.file}")
645
+
646
+ # Create a list of WhenSpec objects
647
+ when_specs = []
648
+ for step in group_data.get(Keyword("steps"), []):
649
+ step_type = step.get(Keyword("type"))
650
+ step_file = step.get(Keyword("filepath"))
651
+
652
+ if step_type == Keyword("sparql-file"):
653
+ when_specs.append(WhenSpec(value=step_file, queryType=MUST.InsertSparql))
654
+
655
+ spec_component.value = when_specs
656
+ except Exception as e:
657
+ log.error(f"Failed to parse EDN file {spec_component.file}: {e}")
658
+ spec_component.value = None
659
+
561
660
  return spec_component
562
661
 
563
662
 
@@ -633,7 +732,7 @@ def get_spec_from_table(subject: URIRef,
633
732
  columns.add(row.variable.value + "_datatype")
634
733
  # add an additional column for the sort order (if any) of the results
635
734
  columns.add("order")
636
- # create an empty dataframe to populate with the results
735
+ # create an empty dataframe to populate with the results data
637
736
  df = pandas.DataFrame(index=list(index), columns=list(columns))
638
737
  # fill the dataframe with the results data
639
738
  for row in expected_results:
@@ -699,3 +798,84 @@ def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph
699
798
  }}"""
700
799
  is_ordered = spec_graph.query(ask_select_ordered)
701
800
  return is_ordered.askAnswer
801
+
802
+
803
+ @get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
804
+ def _get_spec_component_spade_edn_group_source_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
805
+ spec_component = SpadeEdnGroupSourceWhenSpec()
806
+
807
+ # Retrieve the file path for the EDN file
808
+ file_path = get_file_or_fileurl(spec_component_details)
809
+ absolute_file_path = get_file_absolute_path(spec_component_details, file_path)
810
+
811
+ # Parse the EDN file
812
+ try:
813
+ edn_content = Path(absolute_file_path).read_text()
814
+ edn_data = edn_format.loads(edn_content)
815
+ except FileNotFoundError:
816
+ raise ValueError(f"EDN file not found: {absolute_file_path}")
817
+ except edn_format.EDNDecodeError as e:
818
+ raise ValueError(f"Failed to parse EDN file {absolute_file_path}: {e}")
819
+
820
+ # Retrieve and normalize the group ID
821
+ group_id = spec_component_details.spec_graph.value(
822
+ subject=spec_component_details.spec_component_node,
823
+ predicate=MUST.groupId
824
+ )
825
+
826
+ if not group_id:
827
+ raise ValueError("groupId is missing for SpadeEdnGroupSource")
828
+
829
+ if str(group_id).startswith(':'):
830
+ group_id = str(group_id).lstrip(':')
831
+ from edn_format import Keyword
832
+ group_id = Keyword(group_id)
833
+ else:
834
+ group_id = str(group_id)
835
+
836
+ # Extract the relevant group data
837
+ step_groups = edn_data.get(Keyword("step-groups"), [])
838
+ group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == group_id), None)
839
+
840
+ if not group_data:
841
+ raise ValueError(f"Group ID {group_id} not found in EDN file {absolute_file_path}")
842
+
843
+ # Create a list of WhenSpec objects
844
+ when_specs = []
845
+ for step in group_data.get(Keyword("steps"), []):
846
+ step_type = step.get(Keyword("type"))
847
+ step_file = step.get(Keyword("filepath"))
848
+
849
+ if step_type == Keyword("sparql-file"):
850
+ try:
851
+ with open(step_file, 'r') as sparql_file:
852
+ sparql_query = sparql_file.read()
853
+
854
+ # Assume the individuals are ConstructSparql queries
855
+ # won't be true for ASK, but good for now.
856
+ when_spec = WhenSpec(
857
+ value=sparql_query,
858
+ queryType=MUST.UpdateSparql,
859
+ bindings=None
860
+ )
861
+ when_specs.append(when_spec)
862
+ except FileNotFoundError:
863
+ raise ValueError(f"SPARQL file not found: {step_file}")
864
+
865
+ spec_component.file = str(absolute_file_path)
866
+ spec_component.groupId = group_id
867
+ spec_component.value = when_specs
868
+ spec_component.queryType = MUST.SpadeEdnGroupSource # Correct query type
869
+
870
+ return spec_component
871
+
872
+
873
+ def parse_sparql_query(query_string: str):
874
+ """
875
+ Parses a SPARQL query string and returns a query object.
876
+ """
877
+ try:
878
+ from rdflib.plugins.sparql.parser import parseQuery
879
+ return parseQuery(query_string)
880
+ except Exception as e:
881
+ raise ValueError(f"Failed to parse SPARQL query: {e}")
mustrd/steprunner.py CHANGED
@@ -23,15 +23,15 @@ SOFTWARE.
23
23
  """
24
24
 
25
25
  import json
26
+ import os
26
27
 
27
- from . import logger_setup
28
28
  from multimethods import MultiMethod, Default
29
29
  from .namespace import MUST, TRIPLESTORE
30
30
  from rdflib import Graph, URIRef
31
31
  from .mustrdRdfLib import execute_select as execute_select_rdflib
32
32
  from .mustrdRdfLib import execute_construct as execute_construct_rdflib
33
33
  from .mustrdRdfLib import execute_update as execute_update_rdflib
34
- from .mustrdAnzo import upload_given as upload_given_anzo
34
+ from .mustrdAnzo import get_query_from_step, upload_given as upload_given_anzo
35
35
  from .mustrdAnzo import execute_update as execute_update_anzo
36
36
  from .mustrdAnzo import execute_construct as execute_construct_anzo
37
37
  from .mustrdAnzo import execute_select as execute_select_anzo
@@ -39,9 +39,11 @@ from .mustrdGraphDb import upload_given as upload_given_graphdb
39
39
  from .mustrdGraphDb import execute_update as execute_update_graphdb
40
40
  from .mustrdGraphDb import execute_construct as execute_construct_graphdb
41
41
  from .mustrdGraphDb import execute_select as execute_select_graphdb
42
- from .spec_component import AnzoWhenSpec, WhenSpec
42
+ from .spec_component import AnzoWhenSpec, WhenSpec, SpadeEdnGroupSourceWhenSpec
43
+ import logging
44
+ from edn_format import loads, Keyword
43
45
 
44
- log = logger_setup.setup_logger(__name__)
46
+ log = logging.getLogger(__name__)
45
47
 
46
48
 
47
49
  def dispatch_upload_given(triple_store: dict, given: Graph):
@@ -71,59 +73,68 @@ def _upload_given_anzo(triple_store: dict, given: Graph):
71
73
  def dispatch_run_when(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
72
74
  ts = triple_store['type']
73
75
  query_type = when.queryType
74
- log.info(f"dispatch_run_when to SPARQL type {query_type} to {ts}")
76
+ log.info(f"dispatch_run_when: spec_uri={spec_uri}, ({ts},{query_type})")
75
77
  return ts, query_type
76
78
 
77
79
 
78
- run_when = MultiMethod('run_when', dispatch_run_when)
80
+ run_when_impl = MultiMethod('run_when', dispatch_run_when)
79
81
 
80
82
 
81
- @run_when.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
83
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
82
84
  def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
83
- return execute_update_anzo(triple_store, when.value, when.bindings)
84
-
85
-
86
- @run_when.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
85
+ log.debug(f"_anzo_run_when_update {spec_uri} {triple_store} {when} {type(when)}")
86
+ if when.value is None:
87
+ # fetch the query from the query step on anzo
88
+ query = get_query_from_step(triple_store=when.spec_component_details.mustrd_triple_store,
89
+ query_step_uri=when.query_step_uri)
90
+ else:
91
+ # we must already have the query
92
+ query = when.value
93
+ log.debug(f"_anzo_run_when_update.query {query}")
94
+ return execute_update_anzo(triple_store, query, when.bindings)
95
+
96
+
97
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
87
98
  def _anzo_run_when_construct(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
88
99
  return execute_construct_anzo(triple_store, when.value, when.bindings)
89
100
 
90
101
 
91
- @run_when.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
102
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
92
103
  def _anzo_run_when_select(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
93
104
  return execute_select_anzo(triple_store, when.value, when.bindings)
94
105
 
95
106
 
96
- @run_when.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
107
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
97
108
  def _graphdb_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
98
109
  return execute_update_graphdb(triple_store, when.value, when.bindings)
99
110
 
100
111
 
101
- @run_when.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
112
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
102
113
  def _graphdb_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
103
114
  return execute_construct_graphdb(triple_store, when.value, when.bindings)
104
115
 
105
116
 
106
- @run_when.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
117
+ @run_when_impl.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
107
118
  def _graphdb_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
108
119
  return execute_select_graphdb(triple_store, when.value, when.bindings)
109
120
 
110
121
 
111
- @run_when.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
122
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
112
123
  def _rdflib_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
113
124
  return execute_update_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
114
125
 
115
126
 
116
- @run_when.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
127
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
117
128
  def _rdflib_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
118
129
  return execute_construct_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
119
130
 
120
131
 
121
- @run_when.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
132
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
122
133
  def _rdflib_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
123
134
  return execute_select_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
124
135
 
125
136
 
126
- @run_when.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
137
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
127
138
  def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
128
139
  # run the parameters query to obtain the values for the template step and put them into a dictionary
129
140
  query_parameters = json.loads(execute_select_anzo(triple_store, when.paramQuery, None))
@@ -150,8 +161,52 @@ def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dic
150
161
  return result
151
162
 
152
163
 
153
- @run_when.method(Default)
164
+ @run_when_impl.method((TRIPLESTORE.Anzo, MUST.SpadeEdnGroupSource))
165
+ def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
166
+ log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
167
+
168
+ results = []
169
+
170
+ # Iterate over the list of WhenSpec objects in `when.value`
171
+ for step_when_spec in when.value:
172
+ try:
173
+ log.info(f"Dispatching run_when for step: {step_when_spec}")
174
+ query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
175
+ log.info(f"Executed SPARQL query: {query_result}")
176
+ results.append(query_result)
177
+ except Exception as e:
178
+ log.error(f"Failed to execute SPARQL query: {e}")
179
+
180
+ log.debug(f"Final results: {results}")
181
+ return results
182
+
183
+
184
+ @run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SpadeEdnGroupSource))
185
+ def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
186
+ log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
187
+
188
+ merged_graph = Graph()
189
+
190
+ # Iterate over the list of WhenSpec objects in `when.value`
191
+ for step_when_spec in when.value:
192
+ try:
193
+ if step_when_spec.queryType == MUST.UpdateSparql:
194
+ log.info(f"Dispatching run_when for UpdateSparql step: {step_when_spec}")
195
+ query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
196
+ log.info(f"Executed SPARQL query: {query_result}")
197
+ merged_graph += query_result # Merge the resulting graph
198
+ else:
199
+ log.warning(f"Unsupported queryType: {step_when_spec.queryType}")
200
+ except Exception as e:
201
+ log.error(f"Failed to execute SPARQL query: {e}")
202
+
203
+ log.debug(f"Final merged graph has {len(merged_graph)} triples.")
204
+ return merged_graph
205
+
206
+
207
+ @run_when_impl.method(Default)
154
208
  def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
209
+ log.error(f"run_when not implemented for {spec_uri} {triple_store} {when}")
155
210
  if when.queryType == MUST.AskSparql:
156
211
  log.warning(f"Skipping {spec_uri}, SPARQL ASK not implemented.")
157
212
  msg = "SPARQL ASK not implemented."
@@ -164,3 +219,6 @@ def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec
164
219
  log.warning(f"Skipping {spec_uri}, {when.queryType} is not a valid SPARQL query type.")
165
220
  msg = f"{when.queryType} is not a valid SPARQL query type."
166
221
  raise NotImplementedError(msg)
222
+
223
+ log.debug(f"run_when registry: {run_when_impl} {dir(run_when_impl)}")
224
+