PyPI - cognite-neat - Versions diffs - 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl - Mend - Supply Chain Defender

cognite-neat 0.87.4py3-none-any.whl → 0.88.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show

cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py DELETED Viewed

@@ -1,239 +0,0 @@
-from typing import cast
-import numpy as np
-import pandas as pd
-from rdflib import OWL, Graph
-from cognite.neat.utils.rdf_ import remove_namespace_from_uri
-def parse_owl_classes(graph: Graph, make_compliant: bool = False, language: str = "en") -> pd.DataFrame:
-    """Parse owl classes from graph to pandas dataframe.
-    Args:
-        graph: Graph containing owl classes
-        make_compliant: Flag for generating compliant classes, by default False
-        language: Language to use for parsing, by default "en"
-    Returns:
-        Dataframe containing owl classes
-    !!! note "make_compliant"
-        If `make_compliant` is set to True, in presence of errors, default values will be used instead.
-        This makes the method very opinionated, but results in a compliant classes.
-    """
-    query = """
-        SELECT ?class ?name ?description ?parentClass ?deprecated ?deprecationDate
-        ?replacedBy ?source ?sourceEntity ?match ?comment
-        WHERE {
-        ?class a owl:Class .
-        OPTIONAL {?class rdfs:subClassOf ?parentClass }.
-        OPTIONAL {?class rdfs:label ?name }.
-        OPTIONAL {?class rdfs:comment ?description} .
-        OPTIONAL {?class owl:deprecated ?deprecated} .
-        FILTER (!isBlank(?class))
-        FILTER (!bound(?parentClass) || !isBlank(?parentClass))
-        FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "en"))
-        FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
-    }
-    """
-    # create raw dataframe
-    raw_df = _parse_raw_dataframe(cast(list[tuple], list(graph.query(query.replace("en", language)))))
-    if raw_df.empty:
-        return pd.concat([raw_df, pd.DataFrame([len(raw_df) * [""]])], ignore_index=True)
-    # group values and clean up
-    processed_df = _clean_up_classes(raw_df)
-    # make compliant
-    if make_compliant:
-        processed_df = make_classes_compliant(processed_df)
-    # Make Parent Class list elements into string joined with comma
-    processed_df["Parent Class"] = processed_df["Parent Class"].apply(
-        lambda x: ", ".join(x) if isinstance(x, list) and x else None
-    )
-    return processed_df
-def _parse_raw_dataframe(query_results: list[tuple]) -> pd.DataFrame:
-    df = pd.DataFrame(
-        query_results,
-        columns=[
-            "Class",
-            "Name",
-            "Description",
-            "Parent Class",
-            "Deprecated",
-            "Deprecation Date",
-            "Replaced By",
-            "Source",
-            "Source Entity Name",
-            "Match",
-            "Comment",
-        ],
-    )
-    if df.empty:
-        return df
-    # # remove NaNs
-    df.replace(np.nan, "", regex=True, inplace=True)
-    df.Source = df.Class
-    df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
-    df["Source Entity Name"] = df.Class
-    df["Match"] = len(df) * ["exact"]
-    df["Parent Class"] = df["Parent Class"].apply(lambda x: remove_namespace_from_uri(x))
-    return df
-def _clean_up_classes(df: pd.DataFrame) -> pd.DataFrame:
-    clean_list = [
-        {
-            "Class": class_,
-            "Name": group_df["Name"].unique()[0],
-            "Description": "\n".join(list(group_df.Description.unique())),
-            "Parent Class": ", ".join(list(group_df["Parent Class"].unique())),
-            "Deprecated": group_df.Deprecated.unique()[0],
-            "Deprecation Date": group_df["Deprecation Date"].unique()[0],
-            "Replaced By": group_df["Replaced By"].unique()[0],
-            "Source": group_df["Source"].unique()[0],
-            "Source Entity Name": group_df["Name"].unique()[0],
-            "Match Type": group_df["Match"].unique()[0],
-            "Comment": group_df["Comment"].unique()[0],
-        }
-        for class_, group_df in df.groupby("Class")
-    ]
-    df = pd.DataFrame(clean_list)
-    # bring NaNs back
-    df.replace("", None, inplace=True)
-    # split Parent Class column back into list
-    df["Parent Class"] = df["Parent Class"].apply(lambda x: x.split(", ") if isinstance(x, str) else None)
-    return df
-def make_classes_compliant(classes: pd.DataFrame) -> pd.DataFrame:
-    """Make classes compliant.
-    Returns:
-        Dataframe containing compliant classes
-    !!! note "About the compliant classes"
-        The compliant classes are based on the OWL base ontology, but adapted to NEAT and use in CDF.
-        One thing to note is that this method would not be able to fix issues with class ids which
-        are not compliant with the CDF naming convention. For example, if a class id contains a space,
-        starts with a number, etc. This will cause issues when trying to create the class in CDF.
-    """
-    # Replace empty or non-string values in "Match Type" column with "exact"
-    classes["Match Type"] = classes["Match Type"].fillna("exact")
-    classes["Match Type"] = classes["Match Type"].apply(
-        lambda x: "exact" if not isinstance(x, str) or len(x) == 0 else x
-    )
-    # Replace empty or non-string values in "Comment" column with a default value
-    classes["Comment"] = classes["Comment"].fillna("Imported from Ontology by NEAT")
-    classes["Comment"] = classes["Comment"].apply(
-        lambda x: "Imported from Ontology by NEAT" if not isinstance(x, str) or len(x) == 0 else x
-    )
-    # Replace empty or non-boolean values in "Deprecated" column with False
-    classes["Deprecated"] = classes["Deprecated"].fillna(False)
-    classes["Deprecated"] = classes["Deprecated"].apply(lambda x: False if not isinstance(x, bool) else x)
-    # Add _object_property_class, _data_type_property_class, _thing_class to the dataframe
-    classes = pd.concat(
-        [classes, pd.DataFrame([_object_property_class(), _data_type_property_class(), _thing_class()])],
-        ignore_index=True,
-    )
-    # Reduce length of elements in the "Description" column to 1024 characters
-    classes["Description"] = classes["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
-    # Add missing parent classes to the dataframe
-    classes = pd.concat(
-        [classes, pd.DataFrame(_add_parent_class(classes))],
-        ignore_index=True,
-    )
-    return classes
-def _object_property_class() -> dict:
-    return {
-        "Class": "ObjectProperty",
-        "Name": None,
-        "Description": "The class of object properties.",
-        "Parent Class": None,
-        "Source": OWL.ObjectProperty,
-        "Match Type": "exact",
-        "Comment": "Added by NEAT based on owl:ObjectProperty but adapted to NEAT and use in CDF.",
-    }
-def _data_type_property_class() -> dict:
-    return {
-        "Class": "DatatypeProperty",
-        "Name": None,
-        "Description": "The class of data properties.",
-        "Parent Class": None,
-        "Source": OWL.DatatypeProperty,
-        "Match Type": "exact",
-        "Comment": "Added by NEAT based on owl:DatatypeProperty but adapted to NEAT and use in CDF.",
-    }
-def _thing_class() -> dict:
-    return {
-        "Class": "ThingContainer",
-        "Name": None,
-        "Description": "The class of holding class individuals.",
-        "Parent Class": None,
-        "Source": OWL.Thing,
-        "Match Type": "exact",
-        "Comment": (
-            "Added by NEAT. "
-            "Imported from OWL base ontology, it is meant for use as a default"
-            " value type for object properties which miss a declared range."
-        ),
-    }
-def _add_parent_class(df: pd.DataFrame) -> list[dict]:
-    parent_set = {
-        item
-        for sublist in df["Parent Class"].tolist()
-        if sublist
-        for item in sublist
-        if item != "" and item is not None
-    }
-    class_set = set(df["Class"].tolist())
-    rows = []
-    for missing_parent_class in parent_set.difference(class_set):
-        rows += [
-            {
-                "Class": missing_parent_class,
-                "Name": None,
-                "Description": None,
-                "Parent Class": None,
-                "Source": None,
-                "Match Type": None,
-                "Comment": (
-                    "Added by NEAT. "
-                    "This is a parent class that is missing in the ontology. "
-                    "It is added by NEAT to make the ontology compliant with CDF."
-                ),
-            }
-        ]
-    return rows

cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py DELETED Viewed

@@ -1,260 +0,0 @@
-import datetime
-import re
-import pandas as pd
-from rdflib import Graph, Namespace
-from cognite.neat.legacy.rules.models.rules import (
-    cdf_space_compliance_regex,
-    data_model_id_compliance_regex,
-    prefix_compliance_regex,
-    version_compliance_regex,
-)
-from cognite.neat.utils.collection_ import remove_none_elements_from_set
-from cognite.neat.utils.rdf_ import convert_rdflib_content
-def parse_owl_metadata(graph: Graph, make_compliant: bool = False) -> pd.DataFrame:
-    """Parse owl metadata from graph to pandas dataframe.
-    Args:
-        graph: Graph containing owl metadata
-        make_compliant: Flag for generating compliant metadata, by default False
-    Returns:
-        Dataframe containing owl metadata
-    !!! note "make_compliant"
-        If `make_compliant` is set to True, in presence of errors, default values will be used instead.
-        This makes the method very opinionated, but results in a compliant metadata.
-    """
-    # TODO: Move dataframe to dict representation
-    query = """SELECT ?namespace ?prefix ?dataModelName ?cdfSpaceName ?version ?isCurrentVersion
-    ?created ?updated ?title ?description ?creator ?contributor ?rights ?license
-    WHERE {
-        ?namespace a owl:Ontology .
-        OPTIONAL {?namespace owl:versionInfo ?version }.
-        OPTIONAL {?namespace dcterms:creator ?creator }.
-        OPTIONAL {?namespace dcterms:title|rdfs:label|skos:prefLabel ?title }.
-        OPTIONAL {?namespace dcterms:contributor ?contributor }.
-        OPTIONAL {?namespace dcterms:modified ?updated }.
-        OPTIONAL {?namespace dcterms:created ?created }.
-        OPTIONAL {?namespace dcterms:description ?description }.
-        OPTIONAL {?namespace dcterms:rights|dc:rights ?rights }.
-        OPTIONAL {?namespace dcterms:license|dc:license ?license }.
-        FILTER (!isBlank(?namespace))
-        FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
-        FILTER (!bound(?title) || LANG(?title) = "" || LANGMATCHES(LANG(?title), "en"))
-    }
-    """
-    results = [{item for item in sublist} for sublist in list(zip(*graph.query(query), strict=True))]
-    clean_list = convert_rdflib_content(
-        {
-            "namespace": Namespace(results[0].pop()),
-            "prefix": results[1].pop(),
-            "dataModelName": results[2].pop(),
-            "cdfSpaceName": results[3].pop(),
-            "version": results[4].pop(),
-            "isCurrentVersion": results[5].pop(),
-            "created": results[6].pop(),
-            "updated": results[7].pop(),
-            "title": results[8].pop(),
-            "description": results[9].pop(),
-            "creator": (
-                ", ".join(remove_none_elements_from_set(results[10]))
-                if remove_none_elements_from_set(results[10])
-                else None
-            ),
-            "contributor": (
-                ", ".join(remove_none_elements_from_set(results[11]))
-                if remove_none_elements_from_set(results[11])
-                else None
-            ),
-            "rights": results[12].pop(),
-            "license": results[13].pop(),
-        }
-    )
-    if make_compliant:
-        clean_list.pop("created")
-        return pd.DataFrame(list(make_metadata_compliant(clean_list).items()), columns=["Key", "Value"])
-    return pd.DataFrame(list(clean_list.items()), columns=["Key", "Value"])
-def make_metadata_compliant(metadata: dict) -> dict:
-    """Attempts to fix errors in metadata, otherwise defaults to values that will pass validation.
-    Args:
-        metadata: Dictionary containing metadata
-    Returns:
-        Dictionary containing metadata with fixed errors
-    """
-    metadata = fix_namespace(metadata, default=Namespace("http://purl.org/cognite/neat#"))
-    metadata = fix_prefix(metadata)
-    metadata = fix_dataModelName(metadata)
-    metadata = fix_cdfSpaceName(metadata)
-    metadata = fix_version(metadata)
-    metadata = fix_isCurrentVersion(metadata)
-    metadata = fix_date(metadata, date_type="created", default=datetime.datetime.now().replace(microsecond=0))
-    metadata = fix_date(metadata, date_type="updated", default=datetime.datetime.now().replace(microsecond=0))
-    metadata = fix_title(metadata)
-    metadata = fix_description(metadata)
-    metadata = fix_author(metadata, "creator")
-    metadata = fix_author(metadata, "contributor", "Cognite")
-    metadata = fix_rights(metadata)
-    metadata = fix_license(metadata)
-    return metadata
-def fix_license(metadata: dict, default: str = "Unknown license") -> dict:
-    if license := metadata.get("license", None):
-        if not isinstance(license, str):
-            metadata["license"] = default
-        elif isinstance(license, str) and len(license) == 0:
-            metadata["license"] = default
-    else:
-        metadata["license"] = default
-    return metadata
-def fix_rights(metadata: dict, default: str = "Unknown rights") -> dict:
-    if rights := metadata.get("rights", None):
-        if not isinstance(rights, str):
-            metadata["rights"] = default
-        elif isinstance(rights, str) and len(rights) == 0:
-            metadata["rights"] = default
-    else:
-        metadata["rights"] = default
-    return metadata
-def fix_author(metadata: dict, author_type: str = "creator", default: str = "NEAT") -> dict:
-    if author := metadata.get(author_type, None):
-        if not isinstance(author, str) or isinstance(author, list):
-            metadata[author_type] = default
-        elif isinstance(author, str) and len(author) == 0:
-            metadata[author_type] = default
-    else:
-        metadata[author_type] = default
-    return metadata
-def fix_description(metadata: dict, default: str = "This model has been inferred from OWL ontology") -> dict:
-    if description := metadata.get("description", None):
-        if not isinstance(description, str) or len(description) == 0:
-            metadata["description"] = default
-        elif isinstance(description, str) and len(description) > 1024:
-            metadata["description"] = metadata["description"][:1024]
-    else:
-        metadata["description"] = default
-    return metadata
-def fix_cdfSpaceName(metadata: dict, default: str = "playground") -> dict:
-    if space := metadata.get("cdfSpaceName", None):
-        if not isinstance(space, str) or not re.match(cdf_space_compliance_regex, space):
-            metadata["cdfSpaceName"] = default
-    else:
-        metadata["cdfSpaceName"] = default
-    return metadata
-def fix_dataModelName(metadata: dict, default: str = "neat") -> dict:
-    if data_model_name := metadata.get("dataModelName", None):
-        if not isinstance(data_model_name, str) or not re.match(data_model_id_compliance_regex, data_model_name):
-            metadata["dataModelName"] = default
-    else:
-        metadata["dataModelName"] = default
-    return metadata
-def fix_prefix(metadata: dict, default: str = "neat") -> dict:
-    if prefix := metadata.get("prefix", None):
-        if not isinstance(prefix, str) or not re.match(prefix_compliance_regex, prefix):
-            metadata["prefix"] = default
-    else:
-        metadata["prefix"] = default
-    return metadata
-def fix_namespace(metadata: dict, default: Namespace) -> dict:
-    if namespace := metadata.get("namespace", None):
-        if not isinstance(namespace, Namespace):
-            try:
-                metadata["namespace"] = Namespace(namespace)
-            except Exception:
-                metadata["namespace"] = default
-    else:
-        metadata["namespace"] = default
-    return metadata
-def fix_date(
-    metadata: dict,
-    date_type: str,
-    default: datetime.datetime,
-) -> dict:
-    if date := metadata.get(date_type, None):
-        try:
-            if isinstance(date, datetime.datetime):
-                pass
-            elif isinstance(date, datetime.date):
-                metadata[date_type] = datetime.datetime.combine(metadata[date_type], datetime.datetime.min.time())
-            elif isinstance(date, str):
-                metadata[date_type] = datetime.datetime.strptime(metadata[date_type], "%Y-%m-%dT%H:%M:%SZ")
-            else:
-                metadata[date_type] = default
-        except Exception:
-            metadata[date_type] = default
-    else:
-        metadata[date_type] = default
-    return metadata
-def fix_version(metadata: dict, default: str = "1.0.0") -> dict:
-    if version := metadata.get("version", None):
-        if not re.match(version_compliance_regex, version):
-            metadata["version"] = default
-    else:
-        metadata["version"] = default
-    return metadata
-def fix_isCurrentVersion(metadata: dict, default: bool = True) -> dict:
-    if isCurrentVersion := metadata.get("isCurrentVersion", None):
-        if not isinstance(isCurrentVersion, bool):
-            metadata["isCurrentVersion"] = default
-    else:
-        metadata["isCurrentVersion"] = default
-    return metadata
-def fix_title(metadata: dict, default: str = "OWL Inferred Data Model") -> dict:
-    if title := metadata.get("title", None):
-        if not isinstance(title, str):
-            metadata["title"] = default
-        elif isinstance(title, str) and len(title) == 0:
-            metadata["title"] = default
-        elif isinstance(title, str) and len(title) > 255:
-            metadata["title"] = metadata["title"][:255]
-        else:
-            pass
-    else:
-        metadata["title"] = default
-    return metadata