PyPI - mustrd - Versions diffs - 0.2.7a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl - Mend

mustrd 0.2.7a0py3-none-any.whl → 0.3.1a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

mustrd/README.md +2 -0
mustrd/anzo_utils.py +8 -5
mustrd/logger_setup.py +3 -0
mustrd/model/mustrdShapes.ttl +25 -6
mustrd/model/ontology.ttl +6 -2
mustrd/mustrd.py +508 -235
mustrd/mustrdAnzo.py +3 -2
mustrd/mustrdRdfLib.py +8 -1
mustrd/mustrdTestPlugin.py +299 -128
mustrd/namespace.py +10 -1
mustrd/spec_component.py +238 -58
mustrd/steprunner.py +78 -20
mustrd-0.3.1a0.dist-info/METADATA +96 -0
{mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/RECORD +17 -17
mustrd-0.2.7a0.dist-info/METADATA +0 -96
{mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/LICENSE +0 -0
{mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/WHEEL +0 -0
{mustrd-0.2.7a0.dist-info → mustrd-0.3.1a0.dist-info}/entry_points.txt +0 -0

mustrd/spec_component.py CHANGED Viewed

@@ -33,13 +33,15 @@ from rdflib import RDF, Graph, URIRef, Variable, Literal, XSD, util, Conjunctive
 from rdflib.exceptions import ParserError
 from rdflib.term import Node
 from rdflib.plugins.stores.memory import Memory
+import edn_format
 from . import logger_setup
-from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step, get_spec_component_from_graphmart
-from .mustrdAnzo import get_query_from_querybuilder, get_query_from_step
+from .mustrdAnzo import get_queries_for_layer, get_queries_from_templated_step
+from .mustrdAnzo import get_query_from_querybuilder
 from .namespace import MUST, TRIPLESTORE
 from multimethods import MultiMethod, Default
 from .utils import get_mustrd_root
+from urllib.parse import urlparse
 log = logger_setup.setup_logger(__name__)
@@ -65,6 +67,13 @@ class WhenSpec(SpecComponent):
 class AnzoWhenSpec(WhenSpec):
     paramQuery: str = None
     queryTemplate: str = None
+    spec_component_details: any = None
+@dataclass
+class SpadeEdnGroupSourceWhenSpec(WhenSpec):
+    file: str = None
+    groupId: str = None
 @dataclass
@@ -108,6 +117,7 @@ def parse_spec_component(subject: URIRef,
     for spec_component_node in spec_component_nodes:
         data_source_types = get_data_source_types(subject, predicate, spec_graph, spec_component_node)
         for data_source_type in data_source_types:
+            log.debug(f"parse_spec_component {spec_component_node} {data_source_type} {mustrd_triple_store=}")
             spec_component_details = SpecComponentDetails(
                 subject=subject,
                 predicate=predicate,
@@ -117,6 +127,9 @@ def parse_spec_component(subject: URIRef,
                 data_source_type=data_source_type,
                 run_config=run_config,
                 root_paths=get_components_roots(spec_graph, subject, run_config))
+            # get_spec_component potentially talks to anzo for EVERY spec, massively slowing things down
+            # can we defer it to run time?
             spec_component = get_spec_component(spec_component_details)
             if isinstance(spec_component, list):
                 spec_components += spec_component
@@ -249,13 +262,13 @@ get_spec_component = MultiMethod("get_spec_component", get_spec_component_dispat
 @get_spec_component.method((MUST.InheritedDataset, MUST.given))
 def _get_spec_component_inheritedstate_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = GivenSpec()
     return spec_component
 @get_spec_component.method((MUST.FolderDataset, MUST.given))
 def _get_spec_component_folderdatasource_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = GivenSpec()
     file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
                                                         predicate=MUST.fileName)
@@ -271,7 +284,7 @@ def _get_spec_component_folderdatasource_given(spec_component_details: SpecCompo
 @get_spec_component.method((MUST.FolderSparqlSource, MUST.when))
 def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComponentDetails) -> GivenSpec:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = WhenSpec()
     file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
                                                         predicate=MUST.fileName)
@@ -286,7 +299,7 @@ def _get_spec_component_foldersparqlsource_when(spec_component_details: SpecComp
 @get_spec_component.method((MUST.FolderDataset, MUST.then))
 def _get_spec_component_folderdatasource_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = ThenSpec()
     file_name = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
                                                         predicate=MUST.fileName)
@@ -296,17 +309,44 @@ def _get_spec_component_folderdatasource_then(spec_component_details: SpecCompon
 @get_spec_component.method((MUST.FileDataset, MUST.given))
-@get_spec_component.method((MUST.FileDataset, MUST.then))
 def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> GivenSpec:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = GivenSpec()
+    return load_spec_component(spec_component_details, spec_component)
+@get_spec_component.method((MUST.FileDataset, MUST.then))
+def _get_spec_component_filedatasource(spec_component_details: SpecComponentDetails) -> ThenSpec:
+    spec_component = ThenSpec()
     return load_spec_component(spec_component_details, spec_component)
 def load_spec_component(spec_component_details, spec_component):
-    file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
-                                                                 predicate=MUST.file)))
+    file_path = get_file_or_fileurl(spec_component_details)
+    file_path = Path(str(file_path))
     return load_dataset_from_file(get_file_absolute_path(spec_component_details, file_path), spec_component)
+def get_file_or_fileurl(spec_component_details):
+    file_path = spec_component_details.spec_graph.value(
+        subject=spec_component_details.spec_component_node,
+        predicate=MUST.file
+    )
+    if file_path is None:
+        file_path = spec_component_details.spec_graph.value(
+            subject=spec_component_details.spec_component_node,
+            predicate=MUST.fileurl
+        )
+        if file_path is not None and str(file_path).startswith("file://"):
+            # Remove the 'file://' scheme to get the local path
+            # we do it this quick and dirty way because the urlparse library assumes absolute paths, and strips our leading ./
+            # need to confirm this approach is windows safe.
+            new_path = str(file_path)[7:]
+            log.debug(f"converted {file_path=} to {new_path=}")
+            file_path = new_path
+    if file_path is None:
+        # shacl validation will catch this, but we want to raise a more specific error
+        raise ValueError("Neither MUST.file nor MUST.fileurl found for the spec component node")
+    return file_path
 def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
     if path.is_dir():
@@ -337,10 +377,10 @@ def load_dataset_from_file(path: Path, spec_component: ThenSpec) -> ThenSpec:
 @get_spec_component.method((MUST.FileSparqlSource, MUST.when))
 def _get_spec_component_filedatasource_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
-    file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
-                                                                 predicate=MUST.file)))
+    spec_component = WhenSpec()
+    file_path = get_file_or_fileurl(spec_component_details)
+    # file_path = Path(str(spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
+    #                                                              predicate=MUST.file)))
     spec_component.value = get_spec_component_from_file(get_file_absolute_path(spec_component_details, file_path))
     spec_component.queryType = spec_component_details.spec_graph.value(
@@ -352,7 +392,7 @@ def _get_spec_component_filedatasource_when(spec_component_details: SpecComponen
 @get_spec_component.method((MUST.TextSparqlSource, MUST.when))
 def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = WhenSpec()
     # Get specComponent directly from config file (in text string)
     spec_component.value = str(
@@ -366,22 +406,35 @@ def _get_spec_component_TextSparqlSource(spec_component_details: SpecComponentDe
     return spec_component
-# https://github.com/Semantic-partners/mustrd/issues/98
-@get_spec_component.method((MUST.HttpDataset, MUST.given))
-@get_spec_component.method((MUST.HttpDataset, MUST.when))
-@get_spec_component.method((MUST.HttpDataset, MUST.then))
-def _get_spec_component_HttpDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+def _get_spec_component_HttpDataset_shared(spec_component_details: SpecComponentDetails, spec_component):
     # Get specComponent with http GET protocol
-    spec_component.value = requests.get(str(
-        spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
-                                                predicate=MUST.dataSourceUrl)).content)
-    spec_component.queryType = spec_component_details.spec_graph.value(
+    url = spec_component_details.spec_graph.value(
         subject=spec_component_details.spec_component_node,
-        predicate=MUST.queryType)
+        predicate=MUST.dataSourceUrl
+    )
+    if not url:
+        raise ValueError("MUST.dataSourceUrl is missing for HttpDataset")
+    response = requests.get(str(url))
+    response.raise_for_status()
+    spec_component.value = response.content
+    if hasattr(spec_component, "queryType"):
+        spec_component.queryType = spec_component_details.spec_graph.value(
+            subject=spec_component_details.spec_component_node,
+            predicate=MUST.queryType)
     return spec_component
+@get_spec_component.method((MUST.HttpDataset, MUST.given))
+def _get_spec_component_HttpDataset_given(spec_component_details: SpecComponentDetails) -> GivenSpec:
+    return _get_spec_component_HttpDataset_shared(spec_component_details, GivenSpec())
+@get_spec_component.method((MUST.HttpDataset, MUST.when))
+def _get_spec_component_HttpDataset_when(spec_component_details: SpecComponentDetails) -> WhenSpec:
+    return _get_spec_component_HttpDataset_shared(spec_component_details, WhenSpec())
+@get_spec_component.method((MUST.HttpDataset, MUST.then))
+def _get_spec_component_HttpDataset_then(spec_component_details: SpecComponentDetails) -> ThenSpec:
+    return _get_spec_component_HttpDataset_shared(spec_component_details, ThenSpec())
 @get_spec_component.method((MUST.TableDataset, MUST.then))
 def _get_spec_component_TableDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
@@ -402,7 +455,7 @@ def _get_spec_component_EmptyTable(spec_component_details: SpecComponentDetails)
 @get_spec_component.method((MUST.EmptyGraph, MUST.then))
 def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = ThenSpec()
     return spec_component
@@ -410,7 +463,11 @@ def _get_spec_component_EmptyGraph(spec_component_details: SpecComponentDetails)
 @get_spec_component.method((MUST.StatementsDataset, MUST.given))
 @get_spec_component.method((MUST.StatementsDataset, MUST.then))
 def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
+    if spec_component_details.predicate == MUST.given:
+        spec_component = GivenSpec()
+    else:
+        spec_component = ThenSpec()
     store = Memory()
     g = URIRef("http://localhost:7200/test-graph")
     spec_component.value = ConjunctiveGraph(store=store)
@@ -425,18 +482,15 @@ def _get_spec_component_StatementsDataset(spec_component_details: SpecComponentD
 @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.given))
 @get_spec_component.method((MUST.AnzoGraphmartDataset, MUST.then))
 def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    # Choose GivenSpec or ThenSpec based on the predicate in spec_component_details
+    if spec_component_details.predicate == MUST.given:
+        spec_component = GivenSpec()
+    else:
+        spec_component = ThenSpec()
     if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
         # Get GIVEN or THEN from anzo graphmart
-        graphmart = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
-                                                            predicate=MUST.graphmart)
-        layer = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
-                                                        predicate=MUST.layer)
-        spec_component.value = get_spec_component_from_graphmart(
-            triple_store=spec_component_details.mustrd_triple_store,
-            graphmart=graphmart,
-            layer=layer)
+        spec_component.spec_component_details = spec_component_details
     else:
         raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartDataset}")
@@ -445,7 +499,7 @@ def _get_spec_component_AnzoGraphmartDataset(spec_component_details: SpecCompone
 @get_spec_component.method((MUST.AnzoQueryBuilderSparqlSource, MUST.when))
 def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = WhenSpec()
     # Get WHEN specComponent from query builder
     if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
@@ -468,14 +522,16 @@ def _get_spec_component_AnzoQueryBuilderSparqlSource(spec_component_details: Spe
 @get_spec_component.method((MUST.AnzoGraphmartStepSparqlSource, MUST.when))
 def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent:
-    spec_component = init_spec_component(spec_component_details.predicate)
+    spec_component = AnzoWhenSpec()
     # Get WHEN specComponent from query builder
     if spec_component_details.mustrd_triple_store["type"] == TRIPLESTORE.Anzo:
         query_step_uri = spec_component_details.spec_graph.value(subject=spec_component_details.spec_component_node,
                                                                  predicate=MUST.anzoQueryStep)
-        spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
-                                                   query_step_uri=query_step_uri)
+        spec_component.spec_component_details = spec_component_details
+        spec_component.query_step_uri = query_step_uri
+        # spec_component.value = get_query_from_step(triple_store=spec_component_details.mustrd_triple_store,
+        #                                            query_step_uri=query_step_uri)
     # If anzo specific function is called but no anzo defined
     else:
         raise ValueError(f"You must define {TRIPLESTORE.Anzo} to use {MUST.AnzoGraphmartStepSparqlSource}")
@@ -488,7 +544,7 @@ def _get_spec_component_AnzoGraphmartStepSparqlSource(spec_component_details: Sp
 @get_spec_component.method((MUST.AnzoGraphmartQueryDrivenTemplatedStepSparqlSource, MUST.when))
 def _get_spec_component_AnzoGraphmartQueryDrivenTemplatedStepSparqlSource(spec_component_details: SpecComponentDetails) -> SpecComponent: # noqa
-    spec_component = init_spec_component(
+    spec_component = WhenSpec(
         spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
     # Get WHEN specComponent from query builder
@@ -524,11 +580,12 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
     else:
         raise ValueError("This test specification is specific to Anzo and can only be run against that platform.")
     for query in queries:
-        spec_component = init_spec_component(spec_component_details.predicate,
-                                             spec_component_details.mustrd_triple_store["type"])
+        spec_component = WhenSpec(
+            spec_component_details.predicate, spec_component_details.mustrd_triple_store["type"])
         spec_component.value = query.get("query")
         spec_component.paramQuery = query.get("param_query")
         spec_component.queryTemplate = query.get("query_template")
+        spec_component.spec_component_details = spec_component_details
         if spec_component.value:
             spec_component.queryType = spec_component_details.spec_graph.value(
                 subject=spec_component_details.spec_component_node,
@@ -541,23 +598,65 @@ def _get_spec_component_AnzoGraphmartLayerSparqlSource(spec_component_details: S
 @get_spec_component.method(Default)
 def _get_spec_component_default(spec_component_details: SpecComponentDetails) -> SpecComponent:
+    valid_combinations = [key for key in get_spec_component.methods.keys() if key != Default]
+    if (spec_component_details.data_source_type, spec_component_details.predicate) not in valid_combinations:
+        valid_types = ', '.join([f"({data_source_type}, {predicate})" for data_source_type, predicate in valid_combinations])
+        raise ValueError(
+            f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
+            f"spec component ({spec_component_details.predicate}). Valid combinations are: {valid_types}"
+        )
     raise ValueError(
         f"Invalid combination of data source type ({spec_component_details.data_source_type}) and "
         f"spec component ({spec_component_details.predicate})")
-def init_spec_component(predicate: URIRef, triple_store_type: URIRef = None) -> GivenSpec | WhenSpec | ThenSpec | TableThenSpec: # noqa
-    if predicate == MUST.given:
-        spec_component = GivenSpec()
-    elif predicate == MUST.when:
-        if triple_store_type == TRIPLESTORE.Anzo:
-            spec_component = AnzoWhenSpec()
-        else:
-            spec_component = WhenSpec()
-    elif predicate == MUST.then:
-        spec_component = ThenSpec()
-    else:
-        spec_component = SpecComponent()
+@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
+def _get_spec_component_spadeednsource_when(spec_component_details: SpecComponentDetails) -> SpadeEdnGroupSourceWhenSpec:
+    from edn_format import Keyword
+    spec_component = SpadeEdnGroupSourceWhenSpec()
+    spec_component.file = spec_component_details.spec_graph.value(
+        subject=spec_component_details.spec_component_node,
+        predicate=MUST.fileName
+    )
+    spec_component.groupId = spec_component_details.spec_graph.value(
+        subject=spec_component_details.spec_component_node,
+        predicate=MUST.groupId
+    )
+    spec_component.queryType = spec_component_details.spec_graph.value(
+        subject=spec_component_details.spec_component_node,
+        predicate=MUST.queryType
+    )
+    # Initialize `value` by parsing the `file` attribute if available
+    if spec_component.file:
+        try:
+            with open(spec_component.file, "r") as edn_file:
+                edn_content = edn_file.read()
+                parsed_edn = edn_format.loads(edn_content)
+                # Extract group data based on group ID
+                step_groups = parsed_edn.get(Keyword("step-groups"), [])
+                group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == spec_component.groupId), None)
+                if not group_data:
+                    raise ValueError(f"Group ID {spec_component.groupId} not found in EDN file {spec_component.file}")
+                # Create a list of WhenSpec objects
+                when_specs = []
+                for step in group_data.get(Keyword("steps"), []):
+                    step_type = step.get(Keyword("type"))
+                    step_file = step.get(Keyword("filepath"))
+                    if step_type == Keyword("sparql-file"):
+                        when_specs.append(WhenSpec(value=step_file, queryType=MUST.InsertSparql))
+                spec_component.value = when_specs
+        except Exception as e:
+            log.error(f"Failed to parse EDN file {spec_component.file}: {e}")
+            spec_component.value = None
     return spec_component
@@ -633,7 +732,7 @@ def get_spec_from_table(subject: URIRef,
         columns.add(row.variable.value + "_datatype")
     # add an additional column for the sort order (if any) of the results
     columns.add("order")
-    # create an empty dataframe to populate with the results
+    # create an empty dataframe to populate with the results data
     df = pandas.DataFrame(index=list(index), columns=list(columns))
     # fill the dataframe with the results data
     for row in expected_results:
@@ -699,3 +798,84 @@ def is_then_select_ordered(subject: URIRef, predicate: URIRef, spec_graph: Graph
 }}"""
     is_ordered = spec_graph.query(ask_select_ordered)
     return is_ordered.askAnswer
+@get_spec_component.method((MUST.SpadeEdnGroupSource, MUST.when))
+def _get_spec_component_spade_edn_group_source_when(spec_component_details: SpecComponentDetails) -> SpecComponent:
+    spec_component = SpadeEdnGroupSourceWhenSpec()
+    # Retrieve the file path for the EDN file
+    file_path = get_file_or_fileurl(spec_component_details)
+    absolute_file_path = get_file_absolute_path(spec_component_details, file_path)
+    # Parse the EDN file
+    try:
+        edn_content = Path(absolute_file_path).read_text()
+        edn_data = edn_format.loads(edn_content)
+    except FileNotFoundError:
+        raise ValueError(f"EDN file not found: {absolute_file_path}")
+    except edn_format.EDNDecodeError as e:
+        raise ValueError(f"Failed to parse EDN file {absolute_file_path}: {e}")
+    # Retrieve and normalize the group ID
+    group_id = spec_component_details.spec_graph.value(
+        subject=spec_component_details.spec_component_node,
+        predicate=MUST.groupId
+    )
+    if not group_id:
+        raise ValueError("groupId is missing for SpadeEdnGroupSource")
+    if str(group_id).startswith(':'):
+        group_id = str(group_id).lstrip(':')
+        from edn_format import Keyword
+        group_id = Keyword(group_id)
+    else:
+        group_id = str(group_id)
+    # Extract the relevant group data
+    step_groups = edn_data.get(Keyword("step-groups"), [])
+    group_data = next((item for item in step_groups if item.get(Keyword("group-id")) == group_id), None)
+    if not group_data:
+        raise ValueError(f"Group ID {group_id} not found in EDN file {absolute_file_path}")
+    # Create a list of WhenSpec objects
+    when_specs = []
+    for step in group_data.get(Keyword("steps"), []):
+        step_type = step.get(Keyword("type"))
+        step_file = step.get(Keyword("filepath"))
+        if step_type == Keyword("sparql-file"):
+            try:
+                with open(step_file, 'r') as sparql_file:
+                    sparql_query = sparql_file.read()
+                # Assume the individuals are ConstructSparql queries
+                # won't be true for ASK, but good for now.
+                when_spec = WhenSpec(
+                    value=sparql_query,
+                    queryType=MUST.UpdateSparql,
+                    bindings=None
+                )
+                when_specs.append(when_spec)
+            except FileNotFoundError:
+                raise ValueError(f"SPARQL file not found: {step_file}")
+    spec_component.file = str(absolute_file_path)
+    spec_component.groupId = group_id
+    spec_component.value = when_specs
+    spec_component.queryType = MUST.SpadeEdnGroupSource  # Correct query type
+    return spec_component
+def parse_sparql_query(query_string: str):
+    """
+    Parses a SPARQL query string and returns a query object.
+    """
+    try:
+        from rdflib.plugins.sparql.parser import parseQuery
+        return parseQuery(query_string)
+    except Exception as e:
+        raise ValueError(f"Failed to parse SPARQL query: {e}")

mustrd/steprunner.py CHANGED Viewed

@@ -23,15 +23,15 @@ SOFTWARE.
 """
 import json
+import os
-from . import logger_setup
 from multimethods import MultiMethod, Default
 from .namespace import MUST, TRIPLESTORE
 from rdflib import Graph, URIRef
 from .mustrdRdfLib import execute_select as execute_select_rdflib
 from .mustrdRdfLib import execute_construct as execute_construct_rdflib
 from .mustrdRdfLib import execute_update as execute_update_rdflib
-from .mustrdAnzo import upload_given as upload_given_anzo
+from .mustrdAnzo import get_query_from_step, upload_given as upload_given_anzo
 from .mustrdAnzo import execute_update as execute_update_anzo
 from .mustrdAnzo import execute_construct as execute_construct_anzo
 from .mustrdAnzo import execute_select as execute_select_anzo
@@ -39,9 +39,11 @@ from .mustrdGraphDb import upload_given as upload_given_graphdb
 from .mustrdGraphDb import execute_update as execute_update_graphdb
 from .mustrdGraphDb import execute_construct as execute_construct_graphdb
 from .mustrdGraphDb import execute_select as execute_select_graphdb
-from .spec_component import AnzoWhenSpec, WhenSpec
+from .spec_component import AnzoWhenSpec, WhenSpec, SpadeEdnGroupSourceWhenSpec
+import logging
+from edn_format import loads, Keyword
-log = logger_setup.setup_logger(__name__)
+log = logging.getLogger(__name__)
 def dispatch_upload_given(triple_store: dict, given: Graph):
@@ -71,59 +73,68 @@ def _upload_given_anzo(triple_store: dict, given: Graph):
 def dispatch_run_when(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     ts = triple_store['type']
     query_type = when.queryType
-    log.info(f"dispatch_run_when to SPARQL type {query_type} to {ts}")
+    log.info(f"dispatch_run_when: spec_uri={spec_uri}, ({ts},{query_type})")
     return ts, query_type
-run_when = MultiMethod('run_when', dispatch_run_when)
+run_when_impl = MultiMethod('run_when', dispatch_run_when)
-@run_when.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
+@run_when_impl.method((TRIPLESTORE.Anzo, MUST.UpdateSparql))
 def _anzo_run_when_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
-    return execute_update_anzo(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
+    log.debug(f"_anzo_run_when_update {spec_uri} {triple_store} {when} {type(when)}")
+    if when.value is None:
+        # fetch the query from the query step on anzo
+        query = get_query_from_step(triple_store=when.spec_component_details.mustrd_triple_store,
+                                                    query_step_uri=when.query_step_uri)
+    else:
+        # we must already have the query
+        query = when.value
+    log.debug(f"_anzo_run_when_update.query {query}")
+    return execute_update_anzo(triple_store, query, when.bindings)
+@run_when_impl.method((TRIPLESTORE.Anzo, MUST.ConstructSparql))
 def _anzo_run_when_construct(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
     return execute_construct_anzo(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
+@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SelectSparql))
 def _anzo_run_when_select(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
     return execute_select_anzo(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
+@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.UpdateSparql))
 def _graphdb_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_update_graphdb(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
+@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.ConstructSparql))
 def _graphdb_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_construct_graphdb(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
+@run_when_impl.method((TRIPLESTORE.GraphDb, MUST.SelectSparql))
 def _graphdb_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_select_graphdb(triple_store, when.value, when.bindings)
-@run_when.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
+@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.UpdateSparql))
 def _rdflib_run_when_update(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_update_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
-@run_when.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
+@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.ConstructSparql))
 def _rdflib_run_when_construct(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_construct_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
-@run_when.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
+@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SelectSparql))
 def _rdflib_run_when_select(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
     return execute_select_rdflib(triple_store, triple_store["given"], when.value, when.bindings)
-@run_when.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
+@run_when_impl.method((TRIPLESTORE.Anzo, MUST.AnzoQueryDrivenUpdateSparql))
 def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dict, when: AnzoWhenSpec):
     # run the parameters query to obtain the values for the template step and put them into a dictionary
     query_parameters = json.loads(execute_select_anzo(triple_store, when.paramQuery, None))
@@ -150,8 +161,52 @@ def _multi_run_when_anzo_query_driven_update(spec_uri: URIRef, triple_store: dic
         return result
-@run_when.method(Default)
+@run_when_impl.method((TRIPLESTORE.Anzo, MUST.SpadeEdnGroupSource))
+def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
+    log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
+    results = []
+    # Iterate over the list of WhenSpec objects in `when.value`
+    for step_when_spec in when.value:
+        try:
+            log.info(f"Dispatching run_when for step: {step_when_spec}")
+            query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
+            log.info(f"Executed SPARQL query: {query_result}")
+            results.append(query_result)
+        except Exception as e:
+            log.error(f"Failed to execute SPARQL query: {e}")
+    log.debug(f"Final results: {results}")
+    return results
+@run_when_impl.method((TRIPLESTORE.RdfLib, MUST.SpadeEdnGroupSource))
+def _spade_edn_group_source(spec_uri: URIRef, triple_store: dict, when: SpadeEdnGroupSourceWhenSpec):
+    log.info(f"Running SpadeEdnGroupSource for {spec_uri} using {triple_store}")
+    merged_graph = Graph()
+    # Iterate over the list of WhenSpec objects in `when.value`
+    for step_when_spec in when.value:
+        try:
+            if step_when_spec.queryType == MUST.UpdateSparql:
+                log.info(f"Dispatching run_when for UpdateSparql step: {step_when_spec}")
+                query_result = run_when_impl(spec_uri, triple_store, step_when_spec)
+                log.info(f"Executed SPARQL query: {query_result}")
+                merged_graph += query_result  # Merge the resulting graph
+            else:
+                log.warning(f"Unsupported queryType: {step_when_spec.queryType}")
+        except Exception as e:
+            log.error(f"Failed to execute SPARQL query: {e}")
+    log.debug(f"Final merged graph has {len(merged_graph)} triples.")
+    return merged_graph
+@run_when_impl.method(Default)
 def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec):
+    log.error(f"run_when not implemented for {spec_uri} {triple_store} {when}")
     if when.queryType == MUST.AskSparql:
         log.warning(f"Skipping {spec_uri}, SPARQL ASK not implemented.")
         msg = "SPARQL ASK not implemented."
@@ -164,3 +219,6 @@ def _multi_run_when_default(spec_uri: URIRef, triple_store: dict, when: WhenSpec
         log.warning(f"Skipping {spec_uri},  {when.queryType} is not a valid SPARQL query type.")
         msg = f"{when.queryType} is not a valid SPARQL query type."
     raise NotImplementedError(msg)
+log.debug(f"run_when registry: {run_when_impl} {dir(run_when_impl)}")

mustrd 0.2.7a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

mustrd 0.2.7a0py3-none-any.whl → 0.3.1a0py3-none-any.whl