PyPI - cognite-neat - Versions diffs - 0.87.6__py3-none-any.whl → 0.88.0__py3-none-any.whl - Mend

cognite-neat 0.87.6py3-none-any.whl → 0.88.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-neat might be problematic. Click here for more details.

Files changed (125) hide show

cognite/neat/legacy/rules/importers/_dms2rules.py DELETED Viewed

@@ -1,194 +0,0 @@
-import sys
-from collections.abc import Sequence
-from datetime import datetime
-from typing import Any, cast
-import pandas as pd
-from cognite.client import CogniteClient
-from cognite.client.data_classes.data_modeling import (
-    DataModel,
-    DirectRelation,
-    EdgeConnection,
-    MappedProperty,
-    SingleHopConnectionDefinition,
-    View,
-)
-from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType
-from cognite.client.data_classes.data_modeling.ids import DataModelIdentifier, ViewId
-from cognite.neat.legacy.rules.models.tables import Tables
-from cognite.neat.legacy.rules.models.value_types import DMS_VALUE_TYPE_MAPPINGS, XSD_VALUE_TYPE_MAPPINGS
-from ._base import BaseImporter
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-class DMSImporter(BaseImporter):
-    """
-    Converts a Data Model Storage (DMS) data model to a set of transformation rules.
-    Args:
-        views: List of views to convert to transformation rules.
-    """
-    def __init__(self, views: Sequence[View] | DataModel[View], metadata: dict[str, str | float] | None = None):
-        if isinstance(views, DataModel):
-            self.views = views.views
-        else:
-            self.views = list(views)
-        if metadata is None:
-            self.metadata = self._default_metadata()
-            if len(self.views) == 1:
-                self.metadata["version"] = self.views[0].version
-                self.metadata["prefix"] = self.views[0].space
-        else:
-            self.metadata = metadata
-        if isinstance(views, DataModel):
-            if views.name:
-                self.metadata["title"] = views.name
-            if views.description:
-                self.metadata["description"] = views.description
-            if views.space:
-                self.metadata["prefix"] = views.space
-            if views.external_id:
-                self.metadata["suffix"] = views.external_id
-            if views.version:
-                self.metadata["version"] = views.version
-    @classmethod
-    def from_cdf(cls, client: CogniteClient, data_model: DataModelIdentifier) -> Self:
-        """
-        Converts a Data Model Storage (DMS) data model to a set of transformation rules.
-        Args:
-            client: Cognite client to use for fetching data models.
-            data_model: List of data models to convert to transformation rules.
-        !!! Note
-            Beware that `DataModelIdentifier` is just type hint that you cannot instantiate
-            directly, e.g. `id = DataModelIdentifier(space=, external_id, version)` will fail.
-            Instead, provide `data_models` as a list of three element tuples,
-            e.g. `[(space, external_id, version)]`, or two element tuples,
-            e.g. `[(space, external_id)]`, where `space` represents CDF space name,
-            `external_id` represents data model external ID, and `version`
-            represents data model version. If `version` is not provided, whatever is
-            the first version CDF returns it will give you that one.
-        """
-        data_model = client.data_modeling.data_models.retrieve(data_model, inline_views=True)[0]
-        # Avoid duplicate views (same view can be used by multiple data models)
-        views_by_id: dict[ViewId, View] = {}
-        for view in data_model.views:
-            views_by_id[view.as_id()] = view
-        if metadata := cls._to_metadata(data_model):
-            return cls(list(views_by_id.values()), metadata)
-        else:
-            return cls(list(views_by_id.values()))
-    def to_tables(self) -> dict[str, pd.DataFrame]:
-        classes: list[dict[str, str | float]] = []
-        properties: list[dict[str, str | float]] = []
-        for view in self.views:
-            class_id = view.external_id
-            classes.append(
-                {
-                    "Class": class_id,
-                    "Name": view.name or float("nan"),
-                    "Description": view.description or float("nan"),
-                }
-            )
-            for prop_id, prop in view.properties.items():
-                if isinstance(prop, MappedProperty):
-                    # Edge 1-1
-                    if isinstance(prop.type, DirectRelation):
-                        type_ = cast(ViewId, prop.source).external_id
-                    else:
-                        type_ = cast(
-                            str, DMS_VALUE_TYPE_MAPPINGS.get(type(prop.type), XSD_VALUE_TYPE_MAPPINGS["string"]).xsd
-                        )
-                    default_value = prop.default_value
-                    name = prop.name or prop_id
-                    description = prop.description or float("nan")
-                # Edge 1-many
-                elif isinstance(prop, EdgeConnection):
-                    type_ = prop.source.external_id
-                    default_value = None
-                    name = prop.name or prop_id
-                    description = prop.description or float("nan")
-                else:
-                    raise NotImplementedError(f"Property type {type(prop)} not supported")
-                max_count: str | float = "1"
-                if isinstance(prop, SingleHopConnectionDefinition) or (
-                    isinstance(prop, MappedProperty)
-                    and isinstance(prop.type, ListablePropertyType)
-                    and prop.type.is_list
-                ):
-                    max_count = float("nan")
-                min_count: str | float = "1"
-                if isinstance(prop, SingleHopConnectionDefinition) or (
-                    isinstance(prop, MappedProperty) and prop.nullable
-                ):
-                    min_count = "0"
-                properties.append(
-                    {
-                        "Class": class_id,
-                        "Property": prop_id,
-                        "Name": name,
-                        "Description": description,
-                        "Type": type_,
-                        "Default": cast(Any, default_value),  # fixes issues with mypy
-                        "Min Count": min_count,
-                        "Max Count": max_count,
-                        "Rule Type": "rdfpath",
-                        "Rule": f"cim:{class_id}(cim:{prop_id})",
-                    }
-                )
-        return {
-            Tables.metadata: pd.Series(self.metadata).to_frame("value").reset_index(),
-            Tables.classes: pd.DataFrame(classes),
-            Tables.properties: pd.DataFrame(properties),
-        }
-    @staticmethod
-    def _to_metadata(data_model: DataModel) -> dict:
-        mapping = {
-            "space": "cdf_space_name",
-            "external_id": "data_model_name",
-            "version": "version",
-            "description": "description",
-            "created_time": "created",
-            "last_updated_time": "updated",
-            "name": "title",
-        }
-        metadata = {mapping.get(k, k): v for k, v in data_model.to_pandas().value.to_dict().items() if k in mapping}
-        metadata["prefix"] = metadata["data_model_name"]
-        metadata["creator"] = "Unknown"
-        if "created" in metadata:
-            metadata["created"] = datetime.utcfromtimestamp(metadata["created"] / 1e3)
-        if "updated" in metadata:
-            metadata["updated"] = datetime.utcfromtimestamp(metadata["updated"] / 1e3)
-        return metadata
-    def _repr_html_(self) -> str:
-        """Pretty display of the DMSImporter object in a Notebook"""
-        dump = self.metadata
-        dump["views_count"] = len(self.views)
-        return pd.Series(dump).to_frame("value")._repr_html_()  # type: ignore[operator]

cognite/neat/legacy/rules/importers/_graph2rules.py DELETED Viewed

@@ -1,308 +0,0 @@
-"""This module performs importing of graph to TransformationRules pydantic class.
-In more details, it traverses the graph and abstracts class and properties, basically
-generating a list of rules based on which nodes that form the graph are made.
-"""
-import warnings
-from datetime import datetime
-from typing import cast
-import pandas as pd
-from rdflib import Graph, Literal, Namespace, URIRef
-from cognite.neat.constants import get_default_prefixes
-from cognite.neat.legacy.rules import exceptions
-from cognite.neat.legacy.rules.exporters._rules2rules import to_dms_name
-from cognite.neat.legacy.rules.models.tables import Tables
-from cognite.neat.utils.rdf_ import get_namespace, remove_namespace_from_uri, uri_to_short_form
-from ._base import BaseImporter
-class GraphImporter(BaseImporter):
-    """
-    Convert RDF graph, containing nodes and edges, to tables/ transformation rules / Excel file.
-    Args:
-        graph: RDF graph to be imported
-        max_number_of_instance: Max number of instances to be analyzed for each class in RDF graph
-    !!! Note
-        Due to high degree of flexibility of RDF graphs, the RDF graph is not guaranteed to be
-        converted to a complete and/or valid `Rules` object. Therefore, it is recommended to
-        call method `to_raw_rules` to get the raw rules which one should export to Excel file
-        using `exporter.ExcelExporter` and then manually edit the Excel file by checking
-        validation report file produced by the exporter.
-    """
-    def __init__(self, graph: Graph, max_number_of_instance: int = -1):
-        self.graph = graph
-        self.max_number_of_instance = max_number_of_instance
-    def to_tables(self) -> dict[str, pd.DataFrame]:
-        data_model, prefixes = _graph_to_data_model_dict(self.graph, self.max_number_of_instance)
-        return {
-            Tables.metadata: _parse_metadata_df(),
-            Tables.classes: _parse_classes_df(data_model, prefixes),
-            Tables.properties: _parse_properties_df(data_model, prefixes),
-            Tables.prefixes: _parse_prefixes_df(prefixes),
-        }
-def _create_default_properties_parsing_config() -> dict[str, tuple[str, ...]]:
-    # TODO: these are to be read from Property pydantic model
-    return {
-        "header": (
-            "Class",
-            "Property",
-            "Description",
-            "Type",
-            "Min Count",
-            "Max Count",
-            "Rule Type",
-            "Rule",
-            "Source",
-            "Source Entity Name",
-            "Match Type",
-            "Comment",
-        )
-    }
-def _create_default_classes_parsing_config() -> dict[str, tuple[str, ...]]:
-    # TODO: these are to be read from Class pydantic model
-    return {"header": ("Class", "Description", "Parent Class", "Source", "Source Entity Name", "Match Type", "Comment")}
-def _parse_prefixes_df(prefixes: dict[str, Namespace]) -> pd.DataFrame:
-    return pd.DataFrame.from_dict({"Prefix": list(prefixes.keys()), "URI": [str(uri) for uri in prefixes.values()]})
-def _parse_metadata_df() -> pd.DataFrame:
-    clean_list = {
-        "namespace": "http://purl.org/cognite/neat/",
-        "prefix": "playground",
-        "external_id": "neat",
-        "version": "1.0.0",
-        "isCurrentVersion": True,
-        "created": datetime.utcnow(),
-        "updated": datetime.utcnow(),
-        "title": "RDF Graph Inferred Data Model",
-        "description": "This data model has been inferred with NEAT",
-        "creator": "NEAT",
-        "contributor": "NEAT",
-        "rights": "Unknown rights of usage",
-        "license": "Unknown license",
-    }
-    return pd.DataFrame(list(clean_list.items()), columns=["Key", "Value"])
-def _parse_classes_df(data_model: dict, prefixes: dict, parsing_config: dict | None = None) -> pd.DataFrame:
-    if parsing_config is None:
-        parsing_config = _create_default_classes_parsing_config()
-    class_rows = []
-    for class_ in data_model:
-        sanitized_class = to_dms_name(class_, "class")
-        class_rows.append(
-            [
-                sanitized_class,
-                None,
-                None,
-                str(prefixes[data_model[class_]["uri"].split(":")[0]]) + class_,
-                class_,
-                "exact",
-                "Parsed from RDF graph",
-            ]
-        )
-    return pd.DataFrame(class_rows, columns=parsing_config["header"])
-def _parse_properties_df(data_model: dict, prefixes: dict, parsing_config: dict | None = None) -> pd.DataFrame:
-    if parsing_config is None:
-        parsing_config = _create_default_properties_parsing_config()
-    property_rows = []
-    for class_ in data_model:
-        sanitized_class = to_dms_name(class_, "class")
-        for property_ in data_model[class_]["properties"]:
-            for type_ in data_model[class_]["properties"][property_]["value_type"]:
-                sanitized_property = to_dms_name(property_, "property")
-                max_count = max(data_model[class_]["properties"][property_]["occurrence"])
-                property_rows.append(
-                    [
-                        sanitized_class,
-                        sanitized_property,
-                        None,
-                        to_dms_name(type_, "value-type"),
-                        0,  # setting min count to 0 to be more flexible (all properties are optional)
-                        None if max_count > 1 else 1,
-                        "rdfpath",
-                        f'{data_model[class_]["uri"]}({data_model[class_]["properties"][property_]["uri"]})',
-                        str(prefixes[data_model[class_]["properties"][property_]["uri"].split(":")[0]]) + property_,
-                        property_,
-                        "exact",
-                        "Parsed from RDF graph",
-                    ]
-                )
-    return pd.DataFrame(property_rows, columns=parsing_config["header"])
-def _graph_to_data_model_dict(graph: Graph, max_number_of_instance: int = -1) -> tuple[dict, dict]:
-    """Convert RDF graph to dictionary defining data model and prefixes of the graph
-    Args:
-        graph: RDF graph to be converted to TransformationRules object
-        max_number_of_instance: Max number of instances to be considered for each class
-    Returns:
-        Tuple of data model and prefixes of the graph
-    """
-    data_model: dict[str, dict] = {}
-    prefixes: dict[str, Namespace] = get_default_prefixes()
-    for class_ in _get_class_ids(graph):
-        _add_uri_namespace_to_prefixes(class_, prefixes)
-        class_name = remove_namespace_from_uri(class_)
-        if class_name in data_model:
-            warnings.warn(
-                exceptions.GraphClassNameCollision(class_name=class_name).message,
-                category=exceptions.GraphClassNameCollision,
-                stacklevel=2,
-            )
-            class_name = f"{class_name}_{len(data_model)+1}"
-        data_model[class_name] = {"properties": {}, "uri": uri_to_short_form(class_, prefixes)}
-        for instance in _get_class_instance_ids(graph, class_, max_number_of_instance):
-            for property_, occurrence, data_type, object_type in _define_instance_properties(graph, instance):
-                property_name = remove_namespace_from_uri(property_)
-                _add_uri_namespace_to_prefixes(property_, prefixes)
-                type_ = data_type if data_type else object_type
-                # this is to skip rdf:type property
-                if not type_:
-                    continue
-                type_name = remove_namespace_from_uri(type_)
-                _add_uri_namespace_to_prefixes(type_, prefixes)
-                if property_name not in data_model[class_name]["properties"]:
-                    data_model[class_name]["properties"][property_name] = {
-                        "occurrence": {occurrence.value},
-                        "value_type": {type_name: {"uri": uri_to_short_form(type_, prefixes)}},
-                        "uri": uri_to_short_form(property_, prefixes),
-                    }
-                elif type_name not in data_model[class_name]["properties"][property_name]["value_type"]:
-                    data_model[class_name]["properties"][property_name]["value_type"][type_name] = {
-                        "uri": uri_to_short_form(type_, prefixes)
-                    }
-                    warnings.warn(
-                        exceptions.GraphClassPropertyMultiValueTypes(
-                            class_name=class_name,
-                            property_name=property_name,
-                            types=list(data_model[class_name]["properties"][property_name]["value_type"].keys()),
-                        ).message,
-                        category=exceptions.GraphClassPropertyMultiValueTypes,
-                        stacklevel=3,
-                    )
-                elif occurrence.value not in data_model[class_name]["properties"][property_name]["occurrence"]:
-                    data_model[class_name]["properties"][property_name]["occurrence"].add(occurrence.value)
-                    warnings.warn(
-                        exceptions.GraphClassPropertyMultiOccurrence(
-                            class_name=class_name, property_name=property_name
-                        ).message,
-                        category=exceptions.GraphClassPropertyMultiOccurrence,
-                        stacklevel=3,
-                    )
-                else:
-                    continue
-    return data_model, prefixes
-def _add_uri_namespace_to_prefixes(URI: URIRef, prefixes: dict[str, Namespace]):
-    """Add URI to prefixes dict if not already present
-    Args:
-        URI: URI from which namespace is being extracted
-        prefixes: Dict of prefixes and namespaces
-    """
-    if Namespace(get_namespace(URI)) not in prefixes.values():
-        prefixes[f"prefix-{len(prefixes)+1}"] = Namespace(get_namespace(URI))
-def _get_class_ids(graph: Graph) -> list[URIRef]:
-    """Get instances ids for a given class
-    Args:
-        graph: Graph containing class instances
-        class_: Class for which instances are to be found
-        namespace: Namespace of given class (to avoid writing long URIs)
-        limit: Max number of instances to return, by default -1 meaning all instances
-    Returns:
-        List of class instance URIs
-    """
-    query_statement = """SELECT ?class (count(?s) as ?instances )
-                                WHERE { ?s a ?class . }
-                                group by ?class order by DESC(?instances)"""
-    return [cast(tuple[URIRef, int], res)[0] for res in list(graph.query(query_statement))]
-def _get_class_instance_ids(graph: Graph, class_id: URIRef, max_number_of_instance: int = -1) -> list[URIRef]:
-    """Get instances ids for a given class
-    Args:
-        graph: Graph containing class instances
-        class_id: Class id for which instances are to be found
-    Returns:
-        List of class instance URIs
-    """
-    query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .}".replace("class", class_id)
-    if max_number_of_instance > 0:
-        query_statement += f" LIMIT {max_number_of_instance}"
-    return [cast(tuple[URIRef], res)[0] for res in list(graph.query(query_statement))]
-def _define_instance_properties(
-    graph: Graph, instance_id: URIRef
-) -> list[tuple[URIRef, Literal, URIRef | None, None | URIRef]]:
-    """Get properties of a given instance
-    Args:
-        graph: Graph containing class instances
-        instance_id: Instance id for which properties are to be found and defined
-    Returns:
-        List of properties of a given instance
-    """
-    query_statement = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
-                         WHERE {<instance_id> ?property ?value .
-                                BIND(datatype(?value) AS ?dataType)
-                                OPTIONAL {?value rdf:type ?objectType .}
-                                }
-                         GROUP BY ?property ?dataType ?objectType"""
-    results = graph.query(query_statement.replace("instance_id", instance_id))
-    return [cast(tuple[URIRef, Literal, URIRef | None, None | URIRef], res) for res in list(results)]

cognite/neat/legacy/rules/importers/_json2rules.py DELETED Viewed

@@ -1,39 +0,0 @@
-import json
-from pathlib import Path
-from typing import Literal
-from ._dict2rules import ArbitraryDictImporter
-class ArbitraryJSONImporter(ArbitraryDictImporter):
-    """
-    Importer for data given in a JSON file or string.
-    This importer infers the data model from the JSON string based on the shape of the data.
-    Args:
-        json_path_or_str: Path to file with JSON or a JSON string.
-        relationship_direction: Direction of relationships, either "parent-to-child" or "child-to-parent". JSON
-            files are nested with children nested inside parents. This option determines whether the resulting rules
-            will have an edge from parents to children or from children to parents.
-    """
-    def __init__(
-        self,
-        json_path_or_str: Path,
-        relationship_direction: Literal["parent-to-child", "child-to-parent"] = "parent-to-child",
-    ):
-        if isinstance(json_path_or_str, str):
-            data = json.loads(json_path_or_str)
-            super().__init__(data, relationship_direction)
-        elif isinstance(json_path_or_str, Path):
-            if not json_path_or_str.exists():
-                raise ValueError(f"File {json_path_or_str} does not exist")
-            if json_path_or_str.suffix != ".json":
-                raise ValueError(f"File {json_path_or_str} is not a JSON file")
-            self.json_path = json_path_or_str
-            data = json.loads(json_path_or_str.read_text())
-            super().__init__(data, relationship_direction)
-        else:
-            raise TypeError(f"Expected Path or str, got {type(json_path_or_str)}")

cognite/neat/legacy/rules/importers/_owl2rules/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from ._owl2rules import OWLImporter
-__all__ = ["OWLImporter"]

cognite-neat 0.87.6__py3-none-any.whl → 0.88.0__py3-none-any.whl

Potentially problematic release.

cognite-neat 0.87.6py3-none-any.whl → 0.88.0py3-none-any.whl