PyPI - mustrd - Versions diffs - 0.2.7a0__tar.gz → 0.3.0.0__tar.gz - Mend

mustrd 0.2.7a0tar.gz → 0.3.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mustrd
-Version: 0.2.7a0
+Version: 0.3.0.0
 Summary: A Spec By Example framework for RDF and SPARQL, Inspired by Cucumber.
 License: MIT
 Author: John Placek

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/anzo_utils.py RENAMED Viewed

@@ -31,16 +31,18 @@ from requests import Response, HTTPError, RequestException
 from bs4 import BeautifulSoup
 import logging
+logger = logging.getLogger()
 def query_azg(anzo_config: dict, query: str,
               format: str = "json", is_update: bool = False,
               data_layers: List[str] = None):
     params = {
-        'skipCache': True,
+        'skipCache': 'true',
         'format': format,
         'datasourceURI': anzo_config['gqe_uri'],
-        'default-graph-uri': data_layers,
-        'named-graph-uri': data_layers
+        'using-graph-uri' if is_update else 'default-graph-uri': data_layers,
+        'using-named-graph-uri' if is_update else 'named-graph-uri': data_layers
     }
     url = f"{anzo_config['url']}/sparql"
     return send_anzo_query(anzo_config, url=url, params=params, query=query, is_update=is_update)
@@ -52,7 +54,7 @@ def query_graphmart(anzo_config: dict,
                     format: str = "json",
                     data_layers: List[str] = None):
     params = {
-        'skipCache': True,
+        'skipCache': 'true',
         'format': format,
         'default-graph-uri': data_layers,
         'named-graph-uri': data_layers
@@ -87,7 +89,8 @@ def manage_anzo_response(response: Response) -> str:
 def send_anzo_query(anzo_config, url, params, query, is_update=False):
     headers = {"Content-Type": f"application/sparql-{'update' if is_update else 'query' }"}
-    return manage_anzo_response(requests.post(url=url, params=params, data=query,
+    logger.debug(f"send_anzo_query {url=} {query=} {is_update=}")
+    return manage_anzo_response(requests.post(url=url, params=params, data=query.encode('utf-8'),
                                               auth=(anzo_config['username'], anzo_config['password']),
                                               headers=headers, verify=False))

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/logger_setup.py RENAMED Viewed

@@ -35,6 +35,7 @@ def setup_logger(name: str) -> logging.Logger:
     log = logging.getLogger(name)
     log.setLevel(LOG_LEVEL)
     stderr_handler = logging.StreamHandler(sys.stderr)
     stderr_handler.setLevel(logging.ERROR)
     log.addHandler(stderr_handler)

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/mustrdShapes.ttl RENAMED Viewed

@@ -249,5 +249,14 @@ must:AnzoGraphmartQueryDrivenTemplatedStepSparqlSourceShape
                      sh:minCount    1 ;
                      sh:maxCount    1 ; ]  .
+must:SpadeEdnGroupSourceShape
+    a              sh:NodeShape ;
+    sh:targetClass must:SpadeEdnGroupSource ;
+    sh:property    [ sh:path     must:file ;
+                     sh:message "A SpadeEdnGroupSource must have a file property pointing to the spade.edn config." ;
+                     sh:minCount 1 ;
+                     sh:maxCount 1 ; ] ;
+    sh:property    [ sh:path     must:groupId ;
+                     sh:message "A SpadeEdnGroupSource must have a groupId property referencing the group in the EDN file." ;
+                     sh:minCount 1 ;
+                     sh:maxCount 1 ; ] .

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/model/ontology.ttl RENAMED Viewed

@@ -461,6 +461,11 @@ sh:order rdf:type owl:DatatypeProperty ;
                          rdfs:isDefinedBy : ;
                          rdfs:label "AnzoGraphmartQueryDrivenTemplatedStepSparqlSource" .
+### https://mustrd.com/model/SpadeEdnGroupSource
+:SpadeEdnGroupSource rdf:type owl:Class ;
+    rdfs:subClassOf :SparqlSource ;
+    rdfs:comment "Allows reference to a spade.edn file, and a specific groupid (think Anzo layer), within that" ;
+    rdfs:label "SpadeEdnGroupSource" .
 ###  https://mustrd.com/model/Then
 :Then rdf:type owl:Class ;

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrd.py RENAMED Viewed

@@ -55,8 +55,9 @@ import logging
 from http.client import HTTPConnection
 from .steprunner import upload_given, run_when
 from multimethods import MultiMethod
+import traceback
-log = logger_setup.setup_logger(__name__)
+log = logging.getLogger(__name__)
 requests.packages.urllib3.disable_warnings()
 requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL'
@@ -184,20 +185,33 @@ class UpdateSparqlQuery(SparqlAction):
 # https://github.com/Semantic-partners/mustrd/issues/19
 # Validate the specs found in spec_path
-def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
+def validate_specs(run_config: dict,
+                   triple_stores: List,
+                   shacl_graph: Graph,
+                   ont_graph: Graph,
+                   file_name: str = "*",
+                   selected_test_files: List[str] = [])\
         -> Tuple[List, Graph, List]:
     spec_graph = Graph()
     subject_uris = set()
     focus_uris = set()
     invalid_specs = []
-    ttl_files = list(run_config['spec_path'].glob(
-        f'**/{file_name}.mustrd.ttl'))
+    ttl_files = []
+    if not selected_test_files:
+        ttl_files = list(run_config['spec_path'].glob(
+            f'**/{file_name}.mustrd.ttl'))
+        log.info(
+            f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
+    else:
+        ttl_files = selected_test_files
+    log.info(f"Using {ttl_files} for test source")
     ttl_files.sort()
-    log.info(
-        f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
     # For each spec file found in spec_path
     for file in ttl_files:
+        # file = file.resolve()
         error_messages = []
         log.info(f"Parse: {file}")
@@ -224,7 +238,10 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
                                                          advanced=True,
                                                          js=False,
                                                          debug=False)
+        if str(file.name).endswith("_duplicate"):
+            log.debug(f"Validation of {file.name} against SHACL shapes: {conforms}")
+            log.debug(f"{results_graph.serialize(format='turtle')}")
+        # log.debug(f"SHACL validation results: {results_text}")
         # Add error message if not conform to spec shapes
         if not conforms:
             for msg in results_graph.objects(predicate=SH.resultMessage):
@@ -269,6 +286,10 @@ def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple
                         error_messages: list, invalid_specs: list, spec_graph: Graph):
     for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
+        # Always add file name and source file to the graph for error reporting
+        file_graph.add([subject_uri, MUST.specSourceFile, Literal(str(file))])
+        file_graph.add([subject_uri, MUST.specFileName, Literal(file.name)])
         # If we already collected a URI, then we tag it as duplicate and it won't be executed
         if subject_uri in subject_uris:
             log.warning(
@@ -311,8 +332,11 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
                         specs += [get_spec(spec_uri, spec_graph,
                                            run_config, triple_store)]
                     except (ValueError, FileNotFoundError, ConnectionError) as e:
+                        # Try to get file name/path from the graph, but fallback to "unknown"
+                        file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName) or "unknown"
+                        file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile) or "unknown"
                         skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
-                                                        e, get_spec_file(spec_uri, spec_graph))]
+                                                        str(e), str(file_name), Path(file_path))]
     except (BadSyntax, FileNotFoundError) as e:
         template = "An exception of type {0} occurred when trying to parse the triple store configuration file. " \
@@ -334,7 +358,14 @@ def run_specs(specs) -> List[SpecResult]:
 def get_spec_file(spec_uri: URIRef, spec_graph: Graph):
-    return str(spec_graph.value(subject=spec_uri, predicate=MUST.specFileName, default="default.mustrd.ttl"))
+    file_name = spec_graph.value(subject=spec_uri, predicate=MUST.specFileName)
+    if file_name:
+        return str(file_name)
+    # fallback: try to get from MUST.specSourceFile
+    file_path = spec_graph.value(subject=spec_uri, predicate=MUST.specSourceFile)
+    if file_path:
+        return str(Path(file_path).name)
+    return "default.mustrd.ttl"
 def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_triple_store: dict = None) -> Specification:
@@ -367,23 +398,42 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
 def check_result(spec: Specification, result: Union[str, Graph]):
+    log.debug(
+        f"check_result {spec.spec_uri=}, {spec.triple_store=}, {result=} {type(spec.then)}")
     if isinstance(spec.then, TableThenSpec):
+        log.debug("table_comparison")
         return table_comparison(result, spec)
     else:
         graph_compare = graph_comparison(spec.then.value, result)
         if isomorphic(result, spec.then.value):
-            return SpecPassed(spec.spec_uri, spec.triple_store["type"])
+            log.debug(f"isomorphic {spec}")
+            log.debug(f"{spec.spec_uri}")
+            log.debug(f"{spec.triple_store}")
+            ret = SpecPassed(spec.spec_uri, spec.triple_store["type"])
+            return ret
         else:
+            log.debug("not isomorphic")
             if spec.when[0].queryType == MUST.ConstructSparql:
+                log.debug("ConstructSpecFailure")
                 return ConstructSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
             else:
+                log.debug("UpdateSpecFailure")
                 return UpdateSpecFailure(spec.spec_uri, spec.triple_store["type"], graph_compare)
 def run_spec(spec: Specification) -> SpecResult:
     spec_uri = spec.spec_uri
     triple_store = spec.triple_store
-    # close_connection = True
+    if not isinstance(spec, Specification):
+        log.warning(f"check_result called with non-Specification: {type(spec)}")
+        return spec
+        # return SpecSkipped(getattr(spec, 'spec_uri', None), getattr(spec, 'triple_store', {}), "Spec is not a valid Specification instance")
+    log.debug(
+        f"run_spec {spec=}")
     log.debug(
         f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
     if spec.given:
@@ -399,10 +449,16 @@ def run_spec(spec: Specification) -> SpecResult:
                 f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
             try:
                 result = run_when(spec_uri, triple_store, when)
+                log.info(
+                    f"run {when.queryType} spec {spec_uri} on {triple_store['type']} {result=}")
             except ParseException as e:
+                log.error(
+                    f"parseException {e}")
                 return SparqlParseFailure(spec_uri, triple_store["type"], e)
             except NotImplementedError as ex:
-                return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
+                log.error(f"NotImplementedError {ex}")
+                raise ex
+                # return SpecSkipped(spec_uri, triple_store["type"], ex.args[0])
         return check_result(spec, result)
     except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout, OSError) as e:
         # close_connection = False
@@ -413,7 +469,9 @@ def run_spec(spec: Specification) -> SpecResult:
     except (TypeError, RequestException) as e:
         log.error(f"{type(e)} {e}")
         return SparqlExecutionError(spec_uri, triple_store["type"], e)
+    except Exception as e:
+        log.error(f"Unexpected error {e}")
+        return RuntimeError(spec_uri, triple_store["type"], f"{type(e).__name__}: {e}")
     # https://github.com/Semantic-partners/mustrd/issues/78
     # finally:
     #     if type(mustrd_triple_store) == MustrdAnzo and close_connection:
@@ -724,33 +782,33 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
     return expected_results
-def write_result_diff_to_log(res):
+def write_result_diff_to_log(res, info):
     if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
-        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-        log.info(f"{Fore.BLUE} In Expected Not In Actual:")
-        log.info(
+        info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        info(f"{Fore.BLUE} In Expected Not In Actual:")
+        info(
             res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
-        log.info(f"{Fore.RED} in_actual_not_in_expected")
-        log.info(
+        info(f"{Fore.RED} in_actual_not_in_expected")
+        info(
             res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
-        log.info(f"{Fore.GREEN} in_both")
-        log.info(res.graph_comparison.in_both.serialize(format="ttl"))
+        info(f"{Fore.GREEN} in_both")
+        info(res.graph_comparison.in_both.serialize(format="ttl"))
     if isinstance(res, SelectSpecFailure):
-        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-        log.info(res.message)
-        log.info(res.table_comparison.to_markdown())
+        info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        info(res.message)
+        info(res.table_comparison.to_markdown())
     if isinstance(res, SpecPassedWithWarning):
-        log.info(
+        info(
             f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
-        log.info(res.warning)
+        info(res.warning)
     if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
             isinstance(res, SparqlParseFailure):
-        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-        log.info(res.exception)
+        info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        info(res.exception)
     if isinstance(res, SpecSkipped):
-        log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
-        log.info(res.message)
+        info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
+        info(res.message)
 def calculate_row_difference(df1: pandas.DataFrame,

{mustrd-0.2.7a0 → mustrd-0.3.0.0}/mustrd/mustrdAnzo.py RENAMED Viewed

@@ -29,6 +29,7 @@ from mustrd.anzo_utils import query_azg, query_graphmart
 from mustrd.anzo_utils import query_configuration, json_to_dictlist, ttl_to_graph
 def execute_select(triple_store: dict,  when: str, bindings: dict = None) -> str:
     try:
         if bindings:
@@ -39,7 +40,7 @@ def execute_select(triple_store: dict,  when: str, bindings: dict = None) -> str
                             f"FROM <{triple_store['input_graph']}>\nFROM <{triple_store['output_graph']}>").replace(
                                 "${targetGraph}", f"<{triple_store['output_graph']}>")
         # TODO: manage results here
-        return query_azg(anzo_config=triple_store, query=when)
+        return query_azg(anzo_config=triple_store, query=when, data_layers=[triple_store['input_graph']])
     except (ConnectionError, TimeoutError, HTTPError, ConnectTimeout):
         raise
@@ -58,7 +59,7 @@ USING <{triple_store['output_graph']}>""").replace(
                                          "${targetGraph}", f"<{output_graph}>")
     response = query_azg(anzo_config=triple_store, query=substituted_query, is_update=True,
-                         data_layers=input_graph, format="ttl")
+                         data_layers=[input_graph, output_graph], format="ttl")
     logging.debug(f'response {response}')
     # TODO: deal with error responses
     new_graph = ttl_to_graph(query_azg(anzo_config=triple_store, query="construct {?s ?p ?o} { ?s ?p ?o }",

mustrd 0.2.7a0__tar.gz → 0.3.0.0__tar.gz

mustrd 0.2.7a0tar.gz → 0.3.0.0tar.gz