PyPI - graflo - Versions diffs - 1.3.7__py3-none-any.whl - Mend

graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show

graflo/README.md +18 -0
graflo/__init__.py +70 -0
graflo/architecture/__init__.py +38 -0
graflo/architecture/actor.py +1276 -0
graflo/architecture/actor_util.py +450 -0
graflo/architecture/edge.py +418 -0
graflo/architecture/onto.py +376 -0
graflo/architecture/onto_sql.py +54 -0
graflo/architecture/resource.py +163 -0
graflo/architecture/schema.py +135 -0
graflo/architecture/transform.py +292 -0
graflo/architecture/util.py +89 -0
graflo/architecture/vertex.py +562 -0
graflo/caster.py +736 -0
graflo/cli/__init__.py +14 -0
graflo/cli/ingest.py +203 -0
graflo/cli/manage_dbs.py +197 -0
graflo/cli/plot_schema.py +132 -0
graflo/cli/xml2json.py +93 -0
graflo/data_source/__init__.py +48 -0
graflo/data_source/api.py +339 -0
graflo/data_source/base.py +95 -0
graflo/data_source/factory.py +304 -0
graflo/data_source/file.py +148 -0
graflo/data_source/memory.py +70 -0
graflo/data_source/registry.py +82 -0
graflo/data_source/sql.py +183 -0
graflo/db/__init__.py +44 -0
graflo/db/arango/__init__.py +22 -0
graflo/db/arango/conn.py +1025 -0
graflo/db/arango/query.py +180 -0
graflo/db/arango/util.py +88 -0
graflo/db/conn.py +377 -0
graflo/db/connection/__init__.py +6 -0
graflo/db/connection/config_mapping.py +18 -0
graflo/db/connection/onto.py +717 -0
graflo/db/connection/wsgi.py +29 -0
graflo/db/manager.py +119 -0
graflo/db/neo4j/__init__.py +16 -0
graflo/db/neo4j/conn.py +639 -0
graflo/db/postgres/__init__.py +37 -0
graflo/db/postgres/conn.py +948 -0
graflo/db/postgres/fuzzy_matcher.py +281 -0
graflo/db/postgres/heuristics.py +133 -0
graflo/db/postgres/inference_utils.py +428 -0
graflo/db/postgres/resource_mapping.py +273 -0
graflo/db/postgres/schema_inference.py +372 -0
graflo/db/postgres/types.py +148 -0
graflo/db/postgres/util.py +87 -0
graflo/db/tigergraph/__init__.py +9 -0
graflo/db/tigergraph/conn.py +2365 -0
graflo/db/tigergraph/onto.py +26 -0
graflo/db/util.py +49 -0
graflo/filter/__init__.py +21 -0
graflo/filter/onto.py +525 -0
graflo/logging.conf +22 -0
graflo/onto.py +312 -0
graflo/plot/__init__.py +17 -0
graflo/plot/plotter.py +616 -0
graflo/util/__init__.py +23 -0
graflo/util/chunker.py +807 -0
graflo/util/merge.py +150 -0
graflo/util/misc.py +37 -0
graflo/util/onto.py +422 -0
graflo/util/transform.py +454 -0
graflo-1.3.7.dist-info/METADATA +243 -0
graflo-1.3.7.dist-info/RECORD +70 -0
graflo-1.3.7.dist-info/WHEEL +4 -0
graflo-1.3.7.dist-info/entry_points.txt +5 -0
graflo-1.3.7.dist-info/licenses/LICENSE +126 -0

graflo/architecture/schema.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Graph database schema management and configuration.
+This module provides the core schema management functionality for graph databases.
+It defines the structure and configuration of vertices, edges, and resources
+that make up the graph database schema.
+Key Components:
+    - Schema: Main schema container with metadata and configurations
+    - SchemaMetadata: Schema versioning and naming information
+    - Resource: Resource definitions for data processing
+    - VertexConfig: Vertex collection configurations
+    - EdgeConfig: Edge collection configurations
+The schema system provides:
+    - Schema versioning and metadata
+    - Resource management and validation
+    - Vertex and edge configuration
+    - Transform registration and management
+Example:
+    >>> schema = Schema(
+    ...     general=SchemaMetadata(name="social_network", version="1.0"),
+    ...     vertex_config=VertexConfig(...),
+    ...     edge_config=EdgeConfig(...),
+    ...     resources=[Resource(...)]
+    ... )
+    >>> resource = schema.fetch_resource("users")
+"""
+import dataclasses
+import logging
+from collections import Counter
+from graflo.architecture.edge import EdgeConfig
+from graflo.architecture.resource import Resource
+from graflo.architecture.transform import ProtoTransform
+from graflo.architecture.vertex import VertexConfig
+from graflo.onto import BaseDataclass
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass
+class SchemaMetadata(BaseDataclass):
+    """Schema metadata and versioning information.
+    This class holds metadata about the schema, including its name and version.
+    It's used for schema identification and versioning.
+    Attributes:
+        name: Name of the schema
+        version: Optional version string of the schema
+    """
+    name: str
+    version: str | None = None
+@dataclasses.dataclass
+class Schema(BaseDataclass):
+    """Graph database schema configuration.
+    This class represents the complete schema configuration for a graph database.
+    It manages resources, vertex configurations, edge configurations, and transforms.
+    Attributes:
+        general: Schema metadata and versioning information
+        vertex_config: Configuration for vertex collections
+        edge_config: Configuration for edge collections
+        resources: List of resource definitions
+        transforms: Dictionary of available transforms
+        _resources: Internal mapping of resource names to resources
+    """
+    general: SchemaMetadata
+    vertex_config: VertexConfig
+    edge_config: EdgeConfig
+    resources: list[Resource]
+    transforms: dict[str, ProtoTransform] = dataclasses.field(default_factory=dict)
+    def __post_init__(self):
+        """Initialize the schema after dataclass initialization.
+        Sets up transforms, initializes edge configuration, and validates
+        resource names for uniqueness.
+        Raises:
+            ValueError: If duplicate resource names are found
+        """
+        for name, t in self.transforms.items():
+            t.name = name
+        self.edge_config.finish_init(self.vertex_config)
+        for r in self.resources:
+            r.finish_init(
+                vertex_config=self.vertex_config,
+                edge_config=self.edge_config,
+                transforms=self.transforms,
+            )
+        names = [r.name for r in self.resources]
+        c = Counter(names)
+        for k, v in c.items():
+            if v > 1:
+                raise ValueError(f"resource name {k} used {v} times")
+        self._resources: dict[str, Resource] = {}
+        for r in self.resources:
+            self._resources[r.name] = r
+    def fetch_resource(self, name: str | None = None) -> Resource:
+        """Fetch a resource by name or get the first available resource.
+        Args:
+            name: Optional name of the resource to fetch
+        Returns:
+            Resource: The requested resource
+        Raises:
+            ValueError: If the requested resource is not found or if no resources exist
+        """
+        _current_resource = None
+        if name is not None:
+            if name in self._resources:
+                _current_resource = self._resources[name]
+            else:
+                raise ValueError(f"Resource {name} not found")
+        else:
+            if self._resources:
+                _current_resource = self.resources[0]
+            else:
+                raise ValueError("Empty resource container 😕")
+        return _current_resource

graflo/architecture/transform.py ADDED Viewed

@@ -0,0 +1,292 @@
+"""Data transformation and mapping system for graph databases.
+This module provides a flexible system for transforming and mapping data in graph
+databases. It supports both functional transformations and declarative mappings,
+with support for field switching and parameter configuration.
+Key Components:
+    - ProtoTransform: Base class for transform definitions
+    - Transform: Concrete transform implementation
+    - TransformException: Custom exception for transform errors
+The transform system supports:
+    - Functional transformations through imported modules
+    - Field mapping and switching
+    - Parameter configuration
+    - Input/output field specification
+    - Transform composition and inheritance
+Example:
+    >>> transform = Transform(
+    ...     module="my_module",
+    ...     foo="process_data",
+    ...     input=("field1", "field2"),
+    ...     output=("result1", "result2")
+    ... )
+    >>> result = transform({"field1": 1, "field2": 2})
+"""
+from __future__ import annotations
+import dataclasses
+import importlib
+import logging
+from copy import deepcopy
+from typing import Any
+from graflo.onto import BaseDataclass
+logger = logging.getLogger(__name__)
+class TransformException(BaseException):
+    """Base exception for transform-related errors."""
+    pass
+@dataclasses.dataclass
+class ProtoTransform(BaseDataclass):
+    """Base class for transform definitions.
+    This class provides the foundation for data transformations, supporting both
+    functional transformations and declarative mappings.
+    Attributes:
+        name: Optional name of the transform
+        module: Optional module containing the transform function
+        params: Dictionary of transform parameters
+        foo: Optional name of the transform function
+        input: Tuple of input field names
+        output: Tuple of output field names
+        _foo: Internal reference to the transform function
+    """
+    name: str | None = None
+    module: str | None = None
+    params: dict[str, Any] = dataclasses.field(default_factory=dict)
+    foo: str | None = None
+    input: str | list[str] | tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    output: str | list[str] | tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    def __post_init__(self):
+        """Initialize the transform after dataclass initialization.
+        Sets up the transform function and input/output field specifications.
+        """
+        self._foo = None
+        self._init_foo()
+        self.input = self._tuple_it(self.input)
+        if not self.output:
+            self.output = self.input
+        self.output = self._tuple_it(self.output)
+    @staticmethod
+    def _tuple_it(x):
+        """Convert input to tuple format.
+        Args:
+            x: Input to convert (string, list, or tuple)
+        Returns:
+            tuple: Converted tuple
+        """
+        if isinstance(x, str):
+            x = [x]
+        if isinstance(x, list):
+            x = tuple(x)
+        return x
+    def _init_foo(self):
+        """Initialize the transform function from module.
+        Imports the specified module and gets the transform function.
+        Raises:
+            TypeError: If module import fails
+            ValueError: If function lookup fails
+        """
+        if self.module is not None and self.foo is not None:
+            try:
+                _module = importlib.import_module(self.module)
+            except Exception as e:
+                raise TypeError(f"Provided module {self.module} is not valid: {e}")
+            try:
+                self._foo = getattr(_module, self.foo)
+            except Exception as e:
+                raise ValueError(
+                    f"Could not instantiate transform function. Exception: {e}"
+                )
+    def __lt__(self, other):
+        """Compare transforms for ordering.
+        Args:
+            other: Other transform to compare with
+        Returns:
+            bool: True if this transform should be ordered before other
+        """
+        if self._foo is None and other._foo is not None:
+            return True
+        return False
+@dataclasses.dataclass(kw_only=True)
+class Transform(ProtoTransform):
+    """Concrete transform implementation.
+    This class extends ProtoTransform with additional functionality for
+    field mapping, switching, and transform composition.
+    Attributes:
+        fields: Tuple of fields to transform
+        map: Dictionary mapping input fields to output fields
+        switch: Dictionary for field switching logic
+        functional_transform: Whether this is a functional transform
+    """
+    fields: str | list[str] | tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    map: dict[str, str] = dataclasses.field(default_factory=dict)
+    switch: dict[str, Any] = dataclasses.field(default_factory=dict)
+    def __post_init__(self):
+        """Initialize the transform after dataclass initialization.
+        Sets up field specifications and validates transform configuration.
+        Raises:
+            ValueError: If transform configuration is invalid
+        """
+        super().__post_init__()
+        self.functional_transform = False
+        if self._foo is not None:
+            self.functional_transform = True
+        self.input = self._tuple_it(self.input)
+        self.fields = self._tuple_it(self.fields)
+        self.input = self.fields if self.fields and not self.input else self.input
+        if not self.output:
+            self.output = self.input
+        self.output = self._tuple_it(self.output)
+        if not self.input and not self.output:
+            if self.map:
+                items = list(self.map.items())
+                self.input = tuple(x for x, _ in items)
+                self.output = tuple(x for _, x in items)
+            elif self.switch:
+                self.input = tuple([k for k in self.switch])
+                self.output = tuple(self.switch[self.input[0]])
+            elif not self.name:
+                raise ValueError(
+                    "Either input and output, fields, map or name should be"
+                    " provided in Transform constructor."
+                )
+    def __call__(self, *nargs, **kwargs):
+        """Execute the transform.
+        Args:
+            *nargs: Positional arguments for the transform
+            **kwargs: Keyword arguments for the transform
+        Returns:
+            dict: Transformed data
+        """
+        is_mapping = self._foo is None
+        if is_mapping:
+            input_doc = nargs[0]
+            if isinstance(input_doc, dict):
+                output_values = [input_doc[k] for k in self.input]
+            else:
+                output_values = nargs
+        else:
+            if nargs and isinstance(input_doc := nargs[0], dict):
+                new_args = [input_doc[k] for k in self.input]
+                output_values = self._foo(*new_args, **kwargs, **self.params)
+            else:
+                output_values = self._foo(*nargs, **kwargs, **self.params)
+        if self.output:
+            r = self._dress_as_dict(output_values)
+        else:
+            r = output_values
+        return r
+    def _dress_as_dict(self, transform_result):
+        """Convert transform result to dictionary format.
+        Args:
+            transform_result: Result of the transform
+        Returns:
+            dict: Dictionary representation of the result
+        """
+        if isinstance(transform_result, (list, tuple)) and not self.switch:
+            upd = {k: v for k, v in zip(self.output, transform_result)}
+        else:
+            # TODO : temporary solution works only there is one switch clause
+            upd = {self.output[-1]: transform_result}
+        for k0, (q, qq) in self.switch.items():
+            upd.update({q: k0})
+        return upd
+    @property
+    def is_dummy(self):
+        """Check if this is a dummy transform.
+        Returns:
+            bool: True if this is a dummy transform
+        """
+        return (self.name is not None) and (not self.map and self._foo is None)
+    def update(self, t: Transform):
+        """Update this transform with another transform's configuration.
+        Args:
+            t: Transform to update from
+        Returns:
+            Transform: Updated transform
+        """
+        t_copy = deepcopy(t)
+        if self.input:
+            t_copy.input = self.input
+        if self.output:
+            t_copy.output = self.output
+        if self.params:
+            t_copy.params.update(self.params)
+        t_copy.__post_init__()
+        return t_copy
+    def get_barebone(
+        self, other: Transform | None
+    ) -> tuple[Transform | None, Transform | None]:
+        """Get the barebone transform configuration.
+        Args:
+            other: Optional transform to use as base
+        Returns:
+            tuple[Transform | None, Transform | None]: Updated self transform
+            and transform to store in library
+        """
+        self_param = self.to_dict(skip_defaults=True)
+        if self.foo is not None:
+            # self will be the lib transform
+            return None, self
+        elif other is not None and other.foo is not None:
+            # init self from other
+            self_param.pop("foo", None)
+            self_param.pop("module", None)
+            other_param = other.to_dict(skip_defaults=True)
+            other_param.update(self_param)
+            return Transform(**other_param), None
+        else:
+            return None, None

graflo/architecture/util.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Utility functions for graph architecture operations.
+This module provides utility functions for working with graph data structures
+and transformations. It includes functions for dictionary projection and
+graph entity name formatting.
+Key Functions:
+    - project_dict: Project dictionary fields based on inclusion/exclusion
+    - cast_graph_name_to_triple: Convert graph names to standardized triple format
+Example:
+    >>> data = {"a": 1, "b": 2, "c": 3}
+    >>> project_dict(data, ["a", "b"], how="include")
+    {'a': 1, 'b': 2}
+    >>> cast_graph_name_to_triple("user_post_graph")
+    ('user', 'post', None)
+"""
+from graflo.architecture.onto import GraphEntity
+def project_dict(item, keys, how="include"):
+    """Project dictionary fields based on inclusion or exclusion.
+    This function filters a dictionary based on a list of keys, either including
+    or excluding the specified keys.
+    Args:
+        item: Dictionary to project
+        keys: List of keys to include or exclude
+        how: Projection mode - "include" or "exclude" (default: "include")
+    Returns:
+        dict: Projected dictionary containing only the specified fields
+    Example:
+        >>> data = {"a": 1, "b": 2, "c": 3}
+        >>> project_dict(data, ["a", "b"], how="include")
+        {'a': 1, 'b': 2}
+        >>> project_dict(data, ["a"], how="exclude")
+        {'b': 2, 'c': 3}
+    """
+    if how == "include":
+        return {k: v for k, v in item.items() if k in keys}
+    elif how == "exclude":
+        return {k: v for k, v in item.items() if k not in keys}
+    else:
+        return {}
+def cast_graph_name_to_triple(s: GraphEntity) -> str | tuple:
+    """Convert a graph name string to a triple format.
+    This function parses graph entity names into a standardized triple format
+    (source, target, type). It handles various naming patterns and special
+    suffixes like "graph" or "edges".
+    Args:
+        s: Graph entity name or ID
+    Returns:
+        str | tuple: Either a string for simple names or a tuple
+            representing (source, target, type) for complex names
+    Raises:
+        ValueError: If the graph name cannot be cast to a valid format
+    Example:
+        >>> cast_graph_name_to_triple("user_post_graph")
+        ('user', 'post', None)
+        >>> cast_graph_name_to_triple("simple_vertex")
+        ('simple', None)
+    """
+    if isinstance(s, str):
+        s2 = s.split("_")
+        if len(s2) < 2:
+            return s2[0]
+        elif len(s2) == 2:
+            return *s2[:-1], None
+        elif len(s2) == 3:
+            if s2[-1] in ["graph", "edges"]:
+                return *s2[:-1], None
+            else:
+                return tuple(s2)
+        elif len(s2) == 4 and s2[-1] in ["graph", "edges"]:
+            return tuple(s2[:-1])
+        raise ValueError(f"Invalid graph_name {s} : can not be cast to GraphEntity")
+    else:
+        return s