PyPI - mustrd - Versions diffs - 0.2.6.1__py3-none-any.whl → 0.2.7a0__py3-none-any.whl - Mend

mustrd 0.2.6.1py3-none-any.whl → 0.2.7a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

mustrd/README.adoc +33 -0
mustrd/anzo_utils.py +121 -0
mustrd/logger_setup.py +4 -0
mustrd/model/triplestoreOntology.ttl +0 -8
mustrd/model/triplestoreshapes.ttl +0 -3
mustrd/mustrd.py +340 -204
mustrd/mustrdAnzo.py +55 -130
mustrd/mustrdGraphDb.py +3 -3
mustrd/mustrdTestPlugin.py +137 -93
{mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/METADATA +7 -8
{mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/RECORD +14 -13
{mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/WHEEL +1 -1
mustrd/test/test_mustrd.py +0 -5
{mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/LICENSE +0 -0
{mustrd-0.2.6.1.dist-info → mustrd-0.2.7a0.dist-info}/entry_points.txt +0 -0

mustrd/mustrd.py CHANGED Viewed

@@ -23,7 +23,7 @@ SOFTWARE.
 """
 import os
-from typing import Tuple, List
+from typing import Tuple, List, Union
 import tomli
 from rdflib.plugins.parsers.notation3 import BadSyntax
@@ -54,6 +54,7 @@ from pyshacl import validate
 import logging
 from http.client import HTTPConnection
 from .steprunner import upload_given, run_when
+from multimethods import MultiMethod
 log = logger_setup.setup_logger(__name__)
@@ -89,7 +90,7 @@ def debug_requests_off():
 debug_requests_off()
-@dataclass
+@dataclass(frozen=True)
 class Specification:
     spec_uri: URIRef
     triple_store: dict
@@ -97,6 +98,7 @@ class Specification:
     when: WhenSpec
     then: ThenSpec
     spec_file_name: str = "default.mustrd.ttl"
+    spec_source_file: Path = Path("default.mustrd.ttl")
 @dataclass
@@ -157,6 +159,7 @@ class TripleStoreConnectionError(SpecResult):
 class SpecSkipped(SpecResult):
     message: str
     spec_file_name: str = "default.mustrd.ttl"
+    spec_source_file: Path = Path("default.mustrd.ttl")
 @dataclass
@@ -180,21 +183,25 @@ class UpdateSparqlQuery(SparqlAction):
 # https://github.com/Semantic-partners/mustrd/issues/19
+# Validate the specs found in spec_path
 def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, ont_graph: Graph, file_name: str = "*")\
         -> Tuple[List, Graph, List]:
     spec_graph = Graph()
     subject_uris = set()
     focus_uris = set()
     invalid_specs = []
-    ttl_files = list(run_config['spec_path'].glob(f'**/{file_name}.mustrd.ttl'))
+    ttl_files = list(run_config['spec_path'].glob(
+        f'**/{file_name}.mustrd.ttl'))
     ttl_files.sort()
-    log.info(f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
+    log.info(
+        f"Found {len(ttl_files)} {file_name}.mustrd.ttl files in {run_config['spec_path']}")
+    # For each spec file found in spec_path
     for file in ttl_files:
         error_messages = []
         log.info(f"Parse: {file}")
+        # Parse spec file and add error message if not conform to RDF standard
         try:
             file_graph = Graph().parse(file)
         except BadSyntax as e:
@@ -204,6 +211,7 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
             error_messages += [f"Could not extract spec from {file} due to exception of type "
                                f"{type(e).__name__} when parsing file"]
             continue
         # run shacl validation
         conforms, results_graph, results_text = validate(file_graph,
                                                          shacl_graph=shacl_graph,
@@ -216,6 +224,8 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
                                                          advanced=True,
                                                          js=False,
                                                          debug=False)
+        # Add error message if not conform to spec shapes
         if not conforms:
             for msg in results_graph.objects(predicate=SH.resultMessage):
                 log.warning(f"{file_graph}")
@@ -223,47 +233,66 @@ def validate_specs(run_config: dict, triple_stores: List, shacl_graph: Graph, on
                 error_messages += [f"{msg} File: {file.name}"]
         # collect a list of uris of the tests in focus
+        # If focus is found, only the spec in the focus will be executed
         for focus_uri in file_graph.subjects(predicate=MUST.focus, object=Literal("true", datatype=XSD.boolean)):
             if focus_uri in focus_uris:
                 focus_uri = URIRef(str(focus_uri) + "_DUPLICATE")
             focus_uris.add(focus_uri)
-        # make sure there are no duplicate test IRIs in the files
-        for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
-            if subject_uri in subject_uris:
-                log.warning(f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
-                error_messages += [f"Duplicate subject URI found in {file.name}."]
-                subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
-            if len(error_messages) > 0:
-                error_messages.sort()
-                error_message = "\n".join(msg for msg in error_messages)
-                invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name)
-                                  for triple_store in triple_stores]
-            else:
-                subject_uris.add(subject_uri)
-                this_spec_graph = Graph()
-                this_spec_graph.parse(file)
-                spec_uris_in_this_file = list(this_spec_graph.subjects(RDF.type, MUST.TestSpec))
-                for spec in spec_uris_in_this_file:
-                    this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
-                    this_spec_graph.add([spec, MUST.specFileName, Literal(file.name)])
-                spec_graph += this_spec_graph
+        add_spec_validation(file_graph, subject_uris, file,
+                            triple_stores, error_messages, invalid_specs, spec_graph)
     valid_spec_uris = list(spec_graph.subjects(RDF.type, MUST.TestSpec))
     if focus_uris:
-        invalid_focus_specs = []
-        for spec in invalid_specs:
-            if spec.spec_uri in focus_uris:
-                invalid_focus_specs += [spec]
-                focus_uris.remove(spec.spec_uri)
-        log.info(f"Collected {len(focus_uris)} focus test spec(s)")
+        invalid_focus_specs = get_invalid_focus_spec(focus_uris, invalid_specs)
         return focus_uris, spec_graph, invalid_focus_specs
     else:
         log.info(f"Collected {len(valid_spec_uris)} valid test spec(s)")
         return valid_spec_uris, spec_graph, invalid_specs
+def get_invalid_focus_spec(focus_uris: set, invalid_specs: list):
+    invalid_focus_specs = []
+    for spec in invalid_specs:
+        if spec.spec_uri in focus_uris:
+            invalid_focus_specs += [spec]
+            focus_uris.remove(spec.spec_uri)
+    log.info(f"Collected {len(focus_uris)} focus test spec(s)")
+    return invalid_focus_specs
+# Detect duplicate,
+# If no error: associate the spec configuration and the file where this conf is stored
+# If error, aggregate the messages and mark spec as skipped
+def add_spec_validation(file_graph: Graph, subject_uris: set, file: Path, triple_stores: List,
+                        error_messages: list, invalid_specs: list, spec_graph: Graph):
+    for subject_uri in file_graph.subjects(RDF.type, MUST.TestSpec):
+        # If we already collected a URI, then we tag it as duplicate and it won't be executed
+        if subject_uri in subject_uris:
+            log.warning(
+                f"Duplicate subject URI found: {file.name} {subject_uri}. File will not be parsed.")
+            error_messages += [f"Duplicate subject URI found in {file.name}."]
+            subject_uri = URIRef(str(subject_uri) + "_DUPLICATE")
+        if len(error_messages) == 0:
+            subject_uris.add(subject_uri)
+            this_spec_graph = Graph()
+            this_spec_graph.parse(file)
+            spec_uris_in_this_file = list(
+                this_spec_graph.subjects(RDF.type, MUST.TestSpec))
+            for spec in spec_uris_in_this_file:
+                this_spec_graph.add([spec, MUST.specSourceFile, Literal(file)])
+                this_spec_graph.add(
+                    [spec, MUST.specFileName, Literal(file.name)])
+            spec_graph += this_spec_graph
+        else:
+            error_messages.sort()
+            error_message = "\n".join(msg for msg in error_messages)
+            invalid_specs += [SpecSkipped(subject_uri, triple_store["type"], error_message, file.name, file)
+                              for triple_store in triple_stores]
 def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[dict],
               run_config: dict):
     specs = []
@@ -271,14 +300,16 @@ def get_specs(spec_uris: List[URIRef], spec_graph: Graph, triple_stores: List[di
     try:
         for triple_store in triple_stores:
             if "error" in triple_store:
-                log.error(f"{triple_store['error']}. No specs run for this triple store.")
+                log.error(
+                    f"{triple_store['error']}. No specs run for this triple store.")
                 skipped_results += [SpecSkipped(spec_uri, triple_store['type'], triple_store['error'],
                                                 get_spec_file(spec_uri, spec_graph)) for spec_uri in
                                     spec_uris]
             else:
                 for spec_uri in spec_uris:
                     try:
-                        specs += [get_spec(spec_uri, spec_graph, run_config, triple_store)]
+                        specs += [get_spec(spec_uri, spec_graph,
+                                           run_config, triple_store)]
                     except (ValueError, FileNotFoundError, ConnectionError) as e:
                         skipped_results += [SpecSkipped(spec_uri, triple_store['type'],
                                                         e, get_spec_file(spec_uri, spec_graph))]
@@ -319,9 +350,11 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
                                                    mustrd_triple_store=mustrd_triple_store))
         spec_file_name = get_spec_file(spec_uri, spec_graph)
+        spec_file_path = Path(spec_graph.value(
+            subject=spec_uri, predicate=MUST.specSourceFile, default=Path("default.mustrd.ttl")))
         # https://github.com/Semantic-partners/mustrd/issues/92
         return Specification(spec_uri, mustrd_triple_store,
-                             components[0].value, components[1], components[2], spec_file_name)
+                             components[0].value, components[1], components[2], spec_file_name, spec_file_path)
     except (ValueError, FileNotFoundError) as e:
         template = "An exception of type {0} occurred. Arguments:\n{1!r}"
@@ -333,7 +366,7 @@ def get_spec(spec_uri: URIRef, spec_graph: Graph, run_config: dict, mustrd_tripl
         raise
-def check_result(spec, result):
+def check_result(spec: Specification, result: Union[str, Graph]):
     if isinstance(spec.then, TableThenSpec):
         return table_comparison(result, spec)
     else:
@@ -351,7 +384,8 @@ def run_spec(spec: Specification) -> SpecResult:
     spec_uri = spec.spec_uri
     triple_store = spec.triple_store
     # close_connection = True
-    log.debug(f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
+    log.debug(
+        f"run_when {spec_uri=}, {triple_store=}, {spec.given=}, {spec.when=}, {spec.then=}")
     if spec.given:
         given_as_turtle = spec.given.serialize(format="turtle")
         log.debug(f"{given_as_turtle}")
@@ -361,7 +395,8 @@ def run_spec(spec: Specification) -> SpecResult:
             return SpecSkipped(spec_uri, triple_store['type'], "Unable to run Inherited State tests on Rdflib")
     try:
         for when in spec.when:
-            log.info(f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
+            log.info(
+                f"Running {when.queryType} spec {spec_uri} on {triple_store['type']}")
             try:
                 result = run_when(spec_uri, triple_store, when)
             except ParseException as e:
@@ -394,17 +429,21 @@ def get_triple_store_graph(triple_store_graph_path: Path, secrets: str):
         return Graph().parse(triple_store_graph_path).parse(secret_path)
+# Parse and validate triple store configuration
 def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
     triple_stores = []
-    shacl_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
-    ont_graph = Graph().parse(Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
+    shacl_graph = Graph().parse(
+        Path(os.path.join(get_mustrd_root(), "model/triplestoreshapes.ttl")))
+    ont_graph = Graph().parse(
+        Path(os.path.join(get_mustrd_root(), "model/triplestoreOntology.ttl")))
+    # SHACL validation of triple store configuration
     conforms, results_graph, results_text = validate(
-            data_graph=triple_store_graph,
-            shacl_graph=shacl_graph,
-            ont_graph=ont_graph,
-            advanced=True,
-            inference='none'
-        )
+        data_graph=triple_store_graph,
+        shacl_graph=shacl_graph,
+        ont_graph=ont_graph,
+        advanced=True,
+        inference='none'
+    )
     if not conforms:
         raise ValueError(f"Triple store configuration not conform to the shapes. SHACL report: {results_text}",
                          results_graph)
@@ -414,46 +453,13 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
         triple_store["uri"] = triple_store_config
         # Anzo graph via anzo
         if triple_store_type == TRIPLESTORE.Anzo:
-            triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
-            triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
-            try:
-                triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
-                                                                        predicate=TRIPLESTORE.username))
-                triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
-                                                                        predicate=TRIPLESTORE.password))
-            except (FileNotFoundError, ValueError) as e:
-                triple_store["error"] = e
-            triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
-                                                               predicate=TRIPLESTORE.gqeURI)
-            triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
-                                                                   predicate=TRIPLESTORE.inputGraph)
-            triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
-                                                                    predicate=TRIPLESTORE.outputGraph)
-            try:
-                check_triple_store_params(triple_store, ["url", "port", "username", "password", "input_graph"])
-            except ValueError as e:
-                triple_store["error"] = e
+            get_anzo_configuration(
+                triple_store, triple_store_graph, triple_store_config)
         # GraphDB
         elif triple_store_type == TRIPLESTORE.GraphDb:
-            triple_store["url"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.url)
-            triple_store["port"] = triple_store_graph.value(subject=triple_store_config, predicate=TRIPLESTORE.port)
-            try:
-                triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
-                                                                        predicate=TRIPLESTORE.username))
-                triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
-                                                                        predicate=TRIPLESTORE.password))
-            except (FileNotFoundError, ValueError) as e:
-                log.error(f"Credential retrieval failed {e}")
-                triple_store["error"] = e
-            triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
-                                                                  predicate=TRIPLESTORE.repository)
-            triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
-                                                                   predicate=TRIPLESTORE.inputGraph)
+            get_graphDB_configuration(
+                triple_store, triple_store_graph, triple_store_config)
-            try:
-                check_triple_store_params(triple_store, ["url", "port", "repository"])
-            except ValueError as e:
-                triple_store["error"] = e
         elif triple_store_type != TRIPLESTORE.RdfLib:
             triple_store["error"] = f"Triple store not implemented: {triple_store_type}"
@@ -461,15 +467,65 @@ def get_triple_stores(triple_store_graph: Graph) -> list[dict]:
     return triple_stores
+def get_anzo_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
+    triple_store["url"] = triple_store_graph.value(
+        subject=triple_store_config, predicate=TRIPLESTORE.url)
+    triple_store["port"] = triple_store_graph.value(
+        subject=triple_store_config, predicate=TRIPLESTORE.port)
+    try:
+        triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
+                                                                predicate=TRIPLESTORE.username))
+        triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
+                                                                predicate=TRIPLESTORE.password))
+    except (FileNotFoundError, ValueError) as e:
+        triple_store["error"] = e
+    triple_store["gqe_uri"] = triple_store_graph.value(subject=triple_store_config,
+                                                       predicate=TRIPLESTORE.gqeURI)
+    triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
+                                                           predicate=TRIPLESTORE.inputGraph)
+    triple_store["output_graph"] = triple_store_graph.value(subject=triple_store_config,
+                                                            predicate=TRIPLESTORE.outputGraph)
+    try:
+        check_triple_store_params(
+            triple_store, ["url", "port", "username", "password", "input_graph"])
+    except ValueError as e:
+        triple_store["error"] = e
+def get_graphDB_configuration(triple_store: dict, triple_store_graph: Graph, triple_store_config: URIRef):
+    triple_store["url"] = triple_store_graph.value(
+        subject=triple_store_config, predicate=TRIPLESTORE.url)
+    triple_store["port"] = triple_store_graph.value(
+        subject=triple_store_config, predicate=TRIPLESTORE.port)
+    try:
+        triple_store["username"] = str(triple_store_graph.value(subject=triple_store_config,
+                                                                predicate=TRIPLESTORE.username))
+        triple_store["password"] = str(triple_store_graph.value(subject=triple_store_config,
+                                                                predicate=TRIPLESTORE.password))
+    except (FileNotFoundError, ValueError) as e:
+        log.error(f"Credential retrieval failed {e}")
+        triple_store["error"] = e
+    triple_store["repository"] = triple_store_graph.value(subject=triple_store_config,
+                                                          predicate=TRIPLESTORE.repository)
+    triple_store["input_graph"] = triple_store_graph.value(subject=triple_store_config,
+                                                           predicate=TRIPLESTORE.inputGraph)
+    try:
+        check_triple_store_params(triple_store, ["url", "repository"])
+    except ValueError as e:
+        triple_store["error"] = e
 def check_triple_store_params(triple_store: dict, required_params: List[str]):
-    missing_params = [param for param in required_params if triple_store.get(param) is None]
+    missing_params = [
+        param for param in required_params if triple_store.get(param) is None]
     if missing_params:
         raise ValueError(f"Cannot establish connection to {triple_store['type']}. "
                          f"Missing required parameter(s): {', '.join(missing_params)}.")
 def get_credential_from_file(triple_store_name: URIRef, credential: str, config_path: Literal) -> str:
-    log.info(f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
+    log.info(
+        f"get_credential_from_file {triple_store_name}, {credential}, {config_path}")
     if not config_path:
         raise ValueError(f"Cannot establish connection defined in {triple_store_name}. "
                          f"Missing required parameter: {credential}.")
@@ -508,7 +564,8 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
             else:
                 values.append(str(XSD.anyURI))
-        frames = pandas.concat(objs=[frames, pandas.DataFrame([values], columns=columns)], ignore_index=True)
+        frames = pandas.concat(objs=[frames, pandas.DataFrame(
+            [values], columns=columns)], ignore_index=True)
         frames.fillna('', inplace=True)
         if frames.size == 0:
@@ -516,94 +573,124 @@ def json_results_to_panda_dataframe(result: str) -> pandas.DataFrame:
     return frames
-# https://github.com/Semantic-partners/mustrd/issues/110
-# https://github.com/Semantic-partners/mustrd/issues/52
 def table_comparison(result: str, spec: Specification) -> SpecResult:
     warning = None
     order_list = ["order by ?", "order by desc", "order by asc"]
-    ordered_result = any(pattern in spec.when[0].value.lower() for pattern in order_list)
-    then = spec.then.value
-    try:
-        if is_json(result):
-            df = json_results_to_panda_dataframe(result)
-            columns = list(df.columns)
-        else:
-            raise ParseException
-        sorted_columns = sorted(columns)
-        sorted_then_cols = sorted(list(then))
-        if not df.empty:
+    ordered_result = any(
+        pattern in spec.when[0].value.lower() for pattern in order_list)
+    # If sparql query doesn't contain order by clause, but order is define in then spec:
+    # Then ignore order in then spec and print a warning
+    if not ordered_result and spec.then.ordered:
+        warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
+        log.warning(warning)
+    # If sparql query contains an order by clause and then spec is not order:
+    # Spec is inconsistent
+    if ordered_result and not spec.then.ordered:
+        message = "Actual result is ordered, must:then must contain sh:order on every row."
+        return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
+    # Convert results to dataframe
+    if is_json(result):
+        df = json_results_to_panda_dataframe(result)
+    else:
+        return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, "Sparql result is not in JSON")
-            if not ordered_result:
-                df.sort_values(by=columns[::2], inplace=True)
-                df.reset_index(inplace=True, drop=True)
-                if spec.then.ordered:
-                    warning = f"sh:order in {spec.spec_uri} is ignored, no ORDER BY in query"
-                    log.warning(warning)
-            # Scenario 1: expected no result but got a result
-            if then.empty:
-                message = f"""Expected 0 row(s) and 0 column(s),
-                got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"""
-                empty_then = create_empty_dataframe_with_columns(df)
-                df_diff = empty_then.compare(df, result_names=("expected", "actual"))
+    # Compare result with expected
+    df_diff, message = compare_table_results(df, spec)
-            else:
-                # Scenario 2: expected a result and got a result
-                # pandas.set_option('display.max_columns', None)
-                message = f"Expected {then.shape[0]} row(s) and {round(then.shape[1] / 2)} column(s), " \
-                          f"got {df.shape[0]} row(s) and {round(df.shape[1] / 2)} column(s)"
-                if ordered_result is True and not spec.then.ordered:
-                    message += ". Actual result is ordered, must:then must contain sh:order on every row."
-                    return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], None, message)
-                else:
-                    if len(columns) == len(then.columns):
-                        if sorted_columns == sorted_then_cols:
-                            then = then[columns]
-                            if not ordered_result:
-                                then.sort_values(by=columns[::2], inplace=True)
-                                then.reset_index(drop=True, inplace=True)
-                            if df.shape == then.shape and (df.columns == then.columns).all():
-                                df_diff = then.compare(df, result_names=("expected", "actual"))
-                            else:
-                                df_diff = construct_df_diff(df, then)
-                        else:
-                            then = then[sorted_then_cols]
-                            df = df[sorted_columns]
-                            df_diff = construct_df_diff(df, then)
-                    else:
-                        then = then[sorted_then_cols]
-                        df = df[sorted_columns]
-                        df_diff = construct_df_diff(df, then)
+    if df_diff.empty:
+        if warning:
+            return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
         else:
+            return SpecPassed(spec.spec_uri, spec.triple_store["type"])
+    else:
+        log.error("\n" + df_diff.to_markdown())
+        log.error(message)
+        return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
-            if then.empty:
-                # Scenario 3: expected no result, got no result
-                message = "Expected 0 row(s) and 0 column(s), got 0 row(s) and 0 column(s)"
-                df = pandas.DataFrame()
-            else:
-                # Scenario 4: expected a result, but got an empty result
-                message = f"""Expected {then.shape[0]} row(s)
-                              and {round(then.shape[1] / 2)} column(s), got 0 row(s) and 0 column(s)"""
-                then = then[sorted_then_cols]
-                df = create_empty_dataframe_with_columns(then)
-            df_diff = then.compare(df, result_names=("expected", "actual"))
-        if df_diff.empty:
-            if warning:
-                return SpecPassedWithWarning(spec.spec_uri, spec.triple_store["type"], warning)
+def compare_table_results_dispatch(resultDf: DataFrame, spec: Specification):
+    return not resultDf.empty, not spec.then.value.empty
+compare_table_results = MultiMethod(
+    "compare_table_results", compare_table_results_dispatch)
+# Scenario 1: expected a result and got a result
+@compare_table_results.method((True, True))
+def _compare_results(resultDf: DataFrame, spec: Specification):
+    columns = list(resultDf.columns)
+    sorted_columns = sorted(columns)
+    then = spec.then.value
+    sorted_then_cols = sorted(list(then))
+    order_list = ["order by ?", "order by desc", "order by asc"]
+    ordered_result = any(
+        pattern in spec.when[0].value.lower() for pattern in order_list)
+    if not ordered_result:
+        resultDf.sort_values(by=list(resultDf.columns)[::2], inplace=True)
+        resultDf.reset_index(inplace=True, drop=True)
+    if len(columns) == len(then.columns):
+        if sorted_columns == sorted_then_cols:
+            then = then[columns]
+            if not ordered_result:
+                then.sort_values(by=columns[::2], inplace=True)
+                then.reset_index(drop=True, inplace=True)
+            if resultDf.shape == then.shape and (resultDf.columns == then.columns).all():
+                df_diff = then.compare(
+                    resultDf, result_names=("expected", "actual"))
             else:
-                return SpecPassed(spec.spec_uri, spec.triple_store["type"])
+                df_diff = construct_df_diff(resultDf, then)
         else:
-            log.error("\n" + df_diff.to_markdown())
-            log.error(message)
-            return SelectSpecFailure(spec.spec_uri, spec.triple_store["type"], df_diff, message)
+            then = then[sorted_then_cols]
+            resultDf = resultDf[sorted_columns]
+            df_diff = construct_df_diff(resultDf, then)
+    else:
+        then = then[sorted_then_cols]
+        resultDf = resultDf[sorted_columns]
+        df_diff = construct_df_diff(resultDf, then)
-    except ParseException as e:
-        return SparqlParseFailure(spec.spec_uri, spec.triple_store["type"], e)
-    except NotImplementedError as ex:
-        return SpecSkipped(spec.spec_uri, spec.triple_store["type"], ex)
+    message = build_summary_message(then.shape[0], round(
+        then.shape[1] / 2), resultDf.shape[0], round(resultDf.shape[1] / 2))
+    return df_diff, message
+# Scenario 2: expected no result but got a result
+@compare_table_results.method((True, False))
+def _unexpected_results(resultDf: DataFrame, spec: Specification):
+    empty_then = create_empty_dataframe_with_columns(resultDf)
+    df_diff = empty_then.compare(resultDf, result_names=("expected", "actual"))
+    return df_diff, build_summary_message(0, 0, resultDf.shape[0], round(resultDf.shape[1] / 2))
+# Scenario 3: expected a result, but got an empty result
+@compare_table_results.method((False, True))
+def _missing_results(resultDf: DataFrame, spec: Specification):
+    then = spec.then.value
+    then = then[sorted(list(then))]
+    df = create_empty_dataframe_with_columns(then)
+    df_diff = then.compare(df, result_names=("expected", "actual"))
+    return df_diff, build_summary_message(then.shape[0], round(then.shape[1] / 2), 0, 0)
+# Scenario 4: expected no result, got no result
+@compare_table_results.method((False, False))
+def _no_results(resultDf: DataFrame, spec: Specification):
+    df = pandas.DataFrame()
+    df_diff = spec.then.value.compare(df, result_names=("expected", "actual"))
+    return df_diff, build_summary_message(0, 0, 0, 0)
+def build_summary_message(expected_rows, expected_columns, got_rows, got_columns):
+    return f"Expected {expected_rows} row(s) and {expected_columns} column(s), " \
+        f"got {got_rows} row(s) and {got_columns} column(s)"
 def graph_comparison(expected_graph: Graph, actual_graph: Graph) -> GraphComparison:
@@ -637,6 +724,35 @@ def get_then_update(spec_uri: URIRef, spec_graph: Graph) -> Graph:
     return expected_results
+def write_result_diff_to_log(res):
+    if isinstance(res, UpdateSpecFailure) or isinstance(res, ConstructSpecFailure):
+        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        log.info(f"{Fore.BLUE} In Expected Not In Actual:")
+        log.info(
+            res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
+        log.info(f"{Fore.RED} in_actual_not_in_expected")
+        log.info(
+            res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
+        log.info(f"{Fore.GREEN} in_both")
+        log.info(res.graph_comparison.in_both.serialize(format="ttl"))
+    if isinstance(res, SelectSpecFailure):
+        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        log.info(res.message)
+        log.info(res.table_comparison.to_markdown())
+    if isinstance(res, SpecPassedWithWarning):
+        log.info(
+            f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
+        log.info(res.warning)
+    if isinstance(res, TripleStoreConnectionError) or isinstance(res, SparqlExecutionError) or \
+            isinstance(res, SparqlParseFailure):
+        log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        log.info(res.exception)
+    if isinstance(res, SpecSkipped):
+        log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
+        log.info(res.message)
 def calculate_row_difference(df1: pandas.DataFrame,
                              df2: pandas.DataFrame) -> pandas.DataFrame:
     df_all = df1.merge(df2.drop_duplicates(), how='left', indicator=True)
@@ -657,12 +773,16 @@ def construct_df_diff(df: pandas.DataFrame,
     modified_then = then
     if actual_columns.size > 0:
-        modified_then = modified_then.reindex(modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
-        modified_then[actual_columns.to_list()] = modified_then[actual_columns.to_list()].fillna('')
+        modified_then = modified_then.reindex(
+            modified_then.columns.to_list() + actual_columns.to_list(), axis=1)
+        modified_then[actual_columns.to_list(
+        )] = modified_then[actual_columns.to_list()].fillna('')
     if expected_columns.size > 0:
-        modified_df = modified_df.reindex(modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
-        modified_df[expected_columns.to_list()] = modified_df[expected_columns.to_list()].fillna('')
+        modified_df = modified_df.reindex(
+            modified_df.columns.to_list() + expected_columns.to_list(), axis=1)
+        modified_df[expected_columns.to_list(
+        )] = modified_df[expected_columns.to_list()].fillna('')
     modified_df = modified_df.reindex(modified_then.columns, axis=1)
@@ -686,13 +806,17 @@ def generate_row_diff(actual_rows: pandas.DataFrame, expected_rows: pandas.DataF
     if actual_rows.shape[0] > 0:
         empty_actual_copy = create_empty_dataframe_with_columns(actual_rows)
-        df_diff_actual_rows = empty_actual_copy.compare(actual_rows, result_names=("expected", "actual"))
+        df_diff_actual_rows = empty_actual_copy.compare(
+            actual_rows, result_names=("expected", "actual"))
     if expected_rows.shape[0] > 0:
-        empty_expected_copy = create_empty_dataframe_with_columns(expected_rows)
-        df_diff_expected_rows = expected_rows.compare(empty_expected_copy, result_names=("expected", "actual"))
+        empty_expected_copy = create_empty_dataframe_with_columns(
+            expected_rows)
+        df_diff_expected_rows = expected_rows.compare(
+            empty_expected_copy, result_names=("expected", "actual"))
-    df_diff_rows = pandas.concat([df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
+    df_diff_rows = pandas.concat(
+        [df_diff_actual_rows, df_diff_expected_rows], ignore_index=True)
     return df_diff_rows
@@ -707,15 +831,18 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
     # Init dictionaries
     status_dict = defaultdict(lambda: defaultdict(int))
     status_counts = defaultdict(lambda: defaultdict(int))
-    colours = {SpecPassed: Fore.GREEN, SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
+    colours = {SpecPassed: Fore.GREEN,
+               SpecPassedWithWarning: Fore.YELLOW, SpecSkipped: Fore.YELLOW}
     # Populate dictionaries from results
     for result in results:
         status_counts[result.triple_store][type(result)] += 1
         status_dict[result.spec_uri][result.triple_store] = type(result)
     # Get the list of statuses and list of unique triple stores
-    statuses = list(status for inner_dict in status_dict.values() for status in inner_dict.values())
-    triple_stores = list(set(status for inner_dict in status_dict.values() for status in inner_dict.keys()))
+    statuses = list(status for inner_dict in status_dict.values()
+                    for status in inner_dict.values())
+    triple_stores = list(set(status for inner_dict in status_dict.values()
+                         for status in inner_dict.keys()))
     # Convert dictionaries to list for tabulate
     table_rows = [[spec_uri] + [
@@ -728,8 +855,10 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
                     for triple_store in triple_stores] for status in set(statuses)]
     # Display tables with tabulate
-    log.info(tabulate(table_rows, headers=['Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
-    log.info(tabulate(status_rows, headers=['Status / triple stores'] + triple_stores, tablefmt="pretty"))
+    log.info(tabulate(table_rows, headers=[
+             'Spec Uris / triple stores'] + triple_stores, tablefmt="pretty"))
+    log.info(tabulate(status_rows, headers=[
+             'Status / triple stores'] + triple_stores, tablefmt="pretty"))
     pass_count = statuses.count(SpecPassed)
     warning_count = statuses.count(SpecPassedWithWarning)
@@ -746,33 +875,40 @@ def review_results(results: List[SpecResult], verbose: bool) -> None:
     logger_setup.flush()
     log.info(f"{overview_colour}===== {fail_count} failures, {skipped_count} skipped, {Fore.GREEN}{pass_count} passed, "
-          f"{overview_colour}{warning_count} passed with warnings =====")
+             f"{overview_colour}{warning_count} passed with warnings =====")
     if verbose and (fail_count or warning_count or skipped_count):
-        for res in results:
-            if isinstance(res, UpdateSpecFailure):
-                log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-                log.info(f"{Fore.BLUE} In Expected Not In Actual:")
-                log.info(res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
-                log.info()
-                log.info(f"{Fore.RED} in_actual_not_in_expected")
-                log.info(res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
-                log.info(f"{Fore.GREEN} in_both")
-                log.info(res.graph_comparison.in_both.serialize(format="ttl"))
-            if isinstance(res, SelectSpecFailure):
-                log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-                log.info(res.message)
-                log.info(res.table_comparison.to_markdown())
-            if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
-                log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-            if isinstance(res, SpecPassedWithWarning):
-                log.info(f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
-                log.info(res.warning)
-            if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
-                    isinstance(res, SparqlParseFailure):
-                log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
-                log.info(res.exception)
-            if isinstance(res, SpecSkipped):
-                log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
-                log.info(res.message)
+        display_verbose(results)
+def display_verbose(results: List[SpecResult]):
+    for res in results:
+        if isinstance(res, UpdateSpecFailure):
+            log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+            log.info(f"{Fore.BLUE} In Expected Not In Actual:")
+            log.info(
+                res.graph_comparison.in_expected_not_in_actual.serialize(format="ttl"))
+            log.info()
+            log.info(f"{Fore.RED} in_actual_not_in_expected")
+            log.info(
+                res.graph_comparison.in_actual_not_in_expected.serialize(format="ttl"))
+            log.info(f"{Fore.GREEN} in_both")
+            log.info(res.graph_comparison.in_both.serialize(format="ttl"))
+        if isinstance(res, SelectSpecFailure):
+            log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+            log.info(res.message)
+            log.info(res.table_comparison.to_markdown())
+        if isinstance(res, ConstructSpecFailure) or isinstance(res, UpdateSpecFailure):
+            log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+        if isinstance(res, SpecPassedWithWarning):
+            log.info(
+                f"{Fore.YELLOW}Passed with warning {res.spec_uri} {res.triple_store}")
+            log.info(res.warning)
+        if isinstance(res, TripleStoreConnectionError) or type(res, SparqlExecutionError) or \
+                isinstance(res, SparqlParseFailure):
+            log.info(f"{Fore.RED}Failed {res.spec_uri} {res.triple_store}")
+            log.info(res.exception)
+        if isinstance(res, SpecSkipped):
+            log.info(f"{Fore.YELLOW}Skipped {res.spec_uri} {res.triple_store}")
+            log.info(res.message)

mustrd 0.2.6.1__py3-none-any.whl → 0.2.7a0__py3-none-any.whl

mustrd 0.2.6.1py3-none-any.whl → 0.2.7a0py3-none-any.whl