PyPI - graflo - Versions diffs - 1.1.0__py3-none-any.whl - Mend

graflo 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graflo might be problematic. Click here for more details.

Files changed (45) hide show

graflo/README.md +18 -0
graflo/__init__.py +39 -0
graflo/architecture/__init__.py +37 -0
graflo/architecture/actor.py +974 -0
graflo/architecture/actor_util.py +425 -0
graflo/architecture/edge.py +295 -0
graflo/architecture/onto.py +374 -0
graflo/architecture/resource.py +161 -0
graflo/architecture/schema.py +136 -0
graflo/architecture/transform.py +292 -0
graflo/architecture/util.py +93 -0
graflo/architecture/vertex.py +277 -0
graflo/caster.py +409 -0
graflo/cli/__init__.py +14 -0
graflo/cli/ingest.py +144 -0
graflo/cli/manage_dbs.py +193 -0
graflo/cli/plot_schema.py +132 -0
graflo/cli/xml2json.py +93 -0
graflo/db/__init__.py +32 -0
graflo/db/arango/__init__.py +16 -0
graflo/db/arango/conn.py +734 -0
graflo/db/arango/query.py +180 -0
graflo/db/arango/util.py +88 -0
graflo/db/connection.py +304 -0
graflo/db/manager.py +104 -0
graflo/db/neo4j/__init__.py +16 -0
graflo/db/neo4j/conn.py +432 -0
graflo/db/util.py +49 -0
graflo/filter/__init__.py +21 -0
graflo/filter/onto.py +400 -0
graflo/logging.conf +22 -0
graflo/onto.py +186 -0
graflo/plot/__init__.py +17 -0
graflo/plot/plotter.py +556 -0
graflo/util/__init__.py +23 -0
graflo/util/chunker.py +739 -0
graflo/util/merge.py +148 -0
graflo/util/misc.py +37 -0
graflo/util/onto.py +63 -0
graflo/util/transform.py +406 -0
graflo-1.1.0.dist-info/METADATA +157 -0
graflo-1.1.0.dist-info/RECORD +45 -0
graflo-1.1.0.dist-info/WHEEL +4 -0
graflo-1.1.0.dist-info/entry_points.txt +5 -0
graflo-1.1.0.dist-info/licenses/LICENSE +126 -0

graflo/architecture/resource.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Resource management and processing for graph databases.
+This module provides the core resource handling functionality for graph databases.
+It defines how data resources are processed, transformed, and mapped to graph
+structures through a system of actors and transformations.
+Key Components:
+    - Resource: Main class for resource processing and transformation
+    - ActorWrapper: Wrapper for processing actors
+    - ActionContext: Context for processing actions
+The resource system allows for:
+    - Data encoding and transformation
+    - Vertex and edge creation
+    - Weight management
+    - Collection merging
+    - Type casting and validation
+Example:
+    >>> resource = Resource(
+    ...     resource_name="users",
+    ...     apply=[VertexActor("user"), EdgeActor("follows")],
+    ...     encoding=EncodingType.UTF_8
+    ... )
+    >>> result = resource(doc)
+"""
+import dataclasses
+import logging
+from collections import defaultdict
+from typing import Callable
+from dataclass_wizard import JSONWizard
+from graflo.architecture.actor import (
+    ActorWrapper,
+)
+from graflo.architecture.edge import Edge, EdgeConfig
+from graflo.architecture.onto import (
+    ActionContext,
+    EncodingType,
+    GraphEntity,
+)
+from graflo.architecture.transform import ProtoTransform
+from graflo.architecture.vertex import (
+    VertexConfig,
+)
+from graflo.onto import BaseDataclass
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass(kw_only=True)
+class Resource(BaseDataclass, JSONWizard):
+    """Resource configuration and processing.
+    This class represents a data resource that can be processed and transformed
+    into graph structures. It manages the processing pipeline through actors
+    and handles data encoding, transformation, and mapping.
+    Attributes:
+        resource_name: Name of the resource
+        apply: List of actors to apply in sequence
+        encoding: Data encoding type (default: UTF_8)
+        merge_collections: List of collections to merge
+        extra_weights: List of additional edge weights
+        types: Dictionary of field type mappings
+        root: Root actor wrapper for processing
+        vertex_config: Configuration for vertices
+        edge_config: Configuration for edges
+    """
+    resource_name: str
+    apply: list
+    encoding: EncodingType = EncodingType.UTF_8
+    merge_collections: list[str] = dataclasses.field(default_factory=list)
+    extra_weights: list[Edge] = dataclasses.field(default_factory=list)
+    types: dict[str, str] = dataclasses.field(default_factory=dict)
+    def __post_init__(self):
+        """Initialize the resource after dataclass initialization.
+        Sets up the actor wrapper and type mappings. Evaluates type expressions
+        for field type casting.
+        Raises:
+            Exception: If type evaluation fails for any field
+        """
+        self.root = ActorWrapper(*self.apply)
+        self._types: dict[str, Callable] = dict()
+        self.vertex_config: VertexConfig
+        self.edge_config: EdgeConfig
+        for k, v in self.types.items():
+            try:
+                self._types[k] = eval(v)
+            except Exception as ex:
+                logger.error(
+                    f"For resource {self.name} for field {k} failed to cast type {v} : {ex}"
+                )
+    @property
+    def name(self):
+        """Get the resource name.
+        Returns:
+            str: Name of the resource
+        """
+        return self.resource_name
+    def finish_init(
+        self,
+        vertex_config: VertexConfig,
+        edge_config: EdgeConfig,
+        transforms: dict[str, ProtoTransform],
+    ):
+        """Complete resource initialization.
+        Initializes the resource with vertex and edge configurations,
+        and sets up the processing pipeline.
+        Args:
+            vertex_config: Configuration for vertices
+            edge_config: Configuration for edges
+            transforms: Dictionary of available transforms
+        """
+        self.vertex_config = vertex_config
+        self.edge_config = edge_config
+        logger.debug(f"total resource actor count : {self.root.count()}")
+        self.root.finish_init(
+            vertex_config=vertex_config,
+            transforms=transforms,
+            edge_config=edge_config,
+        )
+        logger.debug(f"total resource actor count (after 2 finit): {self.root.count()}")
+        for e in self.extra_weights:
+            e.finish_init(vertex_config)
+    def __call__(self, doc: dict) -> defaultdict[GraphEntity, list]:
+        """Process a document through the resource pipeline.
+        Args:
+            doc: Document to process
+        Returns:
+            defaultdict[GraphEntity, list]: Processed graph entities
+        """
+        ctx = ActionContext()
+        ctx = self.root(ctx, doc=doc)
+        acc = self.root.normalize_ctx(ctx)
+        return acc
+    def count(self):
+        """Get the total number of actors in the resource.
+        Returns:
+            int: Number of actors
+        """
+        return self.root.count()

graflo/architecture/schema.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""Graph database schema management and configuration.
+This module provides the core schema management functionality for graph databases.
+It defines the structure and configuration of vertices, edges, and resources
+that make up the graph database schema.
+Key Components:
+    - Schema: Main schema container with metadata and configurations
+    - SchemaMetadata: Schema versioning and naming information
+    - Resource: Resource definitions for data processing
+    - VertexConfig: Vertex collection configurations
+    - EdgeConfig: Edge collection configurations
+The schema system provides:
+    - Schema versioning and metadata
+    - Resource management and validation
+    - Vertex and edge configuration
+    - Transform registration and management
+Example:
+    >>> schema = Schema(
+    ...     general=SchemaMetadata(name="social_network", version="1.0"),
+    ...     vertex_config=VertexConfig(...),
+    ...     edge_config=EdgeConfig(...),
+    ...     resources=[Resource(...)]
+    ... )
+    >>> resource = schema.fetch_resource("users")
+"""
+import dataclasses
+import logging
+from collections import Counter
+from typing import Optional
+from graflo.architecture.edge import EdgeConfig
+from graflo.architecture.resource import Resource
+from graflo.architecture.transform import ProtoTransform
+from graflo.architecture.vertex import VertexConfig
+from graflo.onto import BaseDataclass
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass
+class SchemaMetadata(BaseDataclass):
+    """Schema metadata and versioning information.
+    This class holds metadata about the schema, including its name and version.
+    It's used for schema identification and versioning.
+    Attributes:
+        name: Name of the schema
+        version: Optional version string of the schema
+    """
+    name: str
+    version: Optional[str] = None
+@dataclasses.dataclass
+class Schema(BaseDataclass):
+    """Graph database schema configuration.
+    This class represents the complete schema configuration for a graph database.
+    It manages resources, vertex configurations, edge configurations, and transforms.
+    Attributes:
+        general: Schema metadata and versioning information
+        vertex_config: Configuration for vertex collections
+        edge_config: Configuration for edge collections
+        resources: List of resource definitions
+        transforms: Dictionary of available transforms
+        _resources: Internal mapping of resource names to resources
+    """
+    general: SchemaMetadata
+    vertex_config: VertexConfig
+    edge_config: EdgeConfig
+    resources: list[Resource]
+    transforms: dict[str, ProtoTransform] = dataclasses.field(default_factory=dict)
+    def __post_init__(self):
+        """Initialize the schema after dataclass initialization.
+        Sets up transforms, initializes edge configuration, and validates
+        resource names for uniqueness.
+        Raises:
+            ValueError: If duplicate resource names are found
+        """
+        for name, t in self.transforms.items():
+            t.name = name
+        self.edge_config.finish_init(self.vertex_config)
+        for r in self.resources:
+            r.finish_init(
+                vertex_config=self.vertex_config,
+                edge_config=self.edge_config,
+                transforms=self.transforms,
+            )
+        names = [r.name for r in self.resources]
+        c = Counter(names)
+        for k, v in c.items():
+            if v > 1:
+                raise ValueError(f"resource name {k} used {v} times")
+        self._resources: dict[str, Resource] = {}
+        for r in self.resources:
+            self._resources[r.name] = r
+    def fetch_resource(self, name: Optional[str] = None) -> Resource:
+        """Fetch a resource by name or get the first available resource.
+        Args:
+            name: Optional name of the resource to fetch
+        Returns:
+            Resource: The requested resource
+        Raises:
+            ValueError: If the requested resource is not found or if no resources exist
+        """
+        _current_resource = None
+        if name is not None:
+            if name in self._resources:
+                _current_resource = self._resources[name]
+            else:
+                raise ValueError(f"Resource {name} not found")
+        else:
+            if self._resources:
+                _current_resource = self.resources[0]
+            else:
+                raise ValueError("Empty resource container 😕")
+        return _current_resource

graflo/architecture/transform.py ADDED Viewed

@@ -0,0 +1,292 @@
+"""Data transformation and mapping system for graph databases.
+This module provides a flexible system for transforming and mapping data in graph
+databases. It supports both functional transformations and declarative mappings,
+with support for field switching and parameter configuration.
+Key Components:
+    - ProtoTransform: Base class for transform definitions
+    - Transform: Concrete transform implementation
+    - TransformException: Custom exception for transform errors
+The transform system supports:
+    - Functional transformations through imported modules
+    - Field mapping and switching
+    - Parameter configuration
+    - Input/output field specification
+    - Transform composition and inheritance
+Example:
+    >>> transform = Transform(
+    ...     module="my_module",
+    ...     foo="process_data",
+    ...     input=("field1", "field2"),
+    ...     output=("result1", "result2")
+    ... )
+    >>> result = transform({"field1": 1, "field2": 2})
+"""
+from __future__ import annotations
+import dataclasses
+import importlib
+import logging
+from copy import deepcopy
+from typing import Optional
+from graflo.onto import BaseDataclass
+logger = logging.getLogger(__name__)
+class TransformException(BaseException):
+    """Base exception for transform-related errors."""
+    pass
+@dataclasses.dataclass
+class ProtoTransform(BaseDataclass):
+    """Base class for transform definitions.
+    This class provides the foundation for data transformations, supporting both
+    functional transformations and declarative mappings.
+    Attributes:
+        name: Optional name of the transform
+        module: Optional module containing the transform function
+        params: Dictionary of transform parameters
+        foo: Optional name of the transform function
+        input: Tuple of input field names
+        output: Tuple of output field names
+        _foo: Internal reference to the transform function
+    """
+    name: Optional[str] = None
+    module: Optional[str] = None
+    params: dict = dataclasses.field(default_factory=dict)
+    foo: Optional[str] = None
+    input: tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    output: tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    def __post_init__(self):
+        """Initialize the transform after dataclass initialization.
+        Sets up the transform function and input/output field specifications.
+        """
+        self._foo = None
+        self._init_foo()
+        self.input = self._tuple_it(self.input)
+        if not self.output:
+            self.output = self.input
+        self.output = self._tuple_it(self.output)
+    @staticmethod
+    def _tuple_it(x):
+        """Convert input to tuple format.
+        Args:
+            x: Input to convert (string, list, or tuple)
+        Returns:
+            tuple: Converted tuple
+        """
+        if isinstance(x, str):
+            x = [x]
+        if isinstance(x, list):
+            x = tuple(x)
+        return x
+    def _init_foo(self):
+        """Initialize the transform function from module.
+        Imports the specified module and gets the transform function.
+        Raises:
+            TypeError: If module import fails
+            ValueError: If function lookup fails
+        """
+        if self.module is not None:
+            try:
+                _module = importlib.import_module(self.module)
+            except Exception as e:
+                raise TypeError(f"Provided module {self.module} is not valid: {e}")
+            try:
+                self._foo = getattr(_module, self.foo)
+            except Exception as e:
+                raise ValueError(
+                    f"Could not instantiate transform function. Exception: {e}"
+                )
+    def __lt__(self, other):
+        """Compare transforms for ordering.
+        Args:
+            other: Other transform to compare with
+        Returns:
+            bool: True if this transform should be ordered before other
+        """
+        if self._foo is None and other._foo is not None:
+            return True
+        return False
+@dataclasses.dataclass
+class Transform(ProtoTransform):
+    """Concrete transform implementation.
+    This class extends ProtoTransform with additional functionality for
+    field mapping, switching, and transform composition.
+    Attributes:
+        fields: Tuple of fields to transform
+        map: Dictionary mapping input fields to output fields
+        switch: Dictionary for field switching logic
+        functional_transform: Whether this is a functional transform
+    """
+    fields: tuple[str, ...] = dataclasses.field(default_factory=tuple)
+    map: dict[str, str] = dataclasses.field(default_factory=dict)
+    switch: dict[str, str] = dataclasses.field(default_factory=dict)
+    def __post_init__(self):
+        """Initialize the transform after dataclass initialization.
+        Sets up field specifications and validates transform configuration.
+        Raises:
+            ValueError: If transform configuration is invalid
+        """
+        super().__post_init__()
+        self.functional_transform = False
+        if self._foo is not None:
+            self.functional_transform = True
+        self.input = self._tuple_it(self.input)
+        self.fields = self._tuple_it(self.fields)
+        self.input = self.fields if self.fields and not self.input else self.input
+        if not self.output:
+            self.output = self.input
+        self.output = self._tuple_it(self.output)
+        if not self.input and not self.output:
+            if self.map:
+                items = list(self.map.items())
+                self.input = tuple(x for x, _ in items)
+                self.output = tuple(x for _, x in items)
+            elif self.switch:
+                self.input = tuple([k for k in self.switch])
+                self.output = tuple(self.switch[self.input[0]])
+            elif not self.name:
+                raise ValueError(
+                    "Either input and output, fields, map or name should be"
+                    " provided in Transform constructor."
+                )
+    def __call__(self, *nargs, **kwargs):
+        """Execute the transform.
+        Args:
+            *nargs: Positional arguments for the transform
+            **kwargs: Keyword arguments for the transform
+        Returns:
+            dict: Transformed data
+        """
+        is_mapping = self._foo is None
+        if is_mapping:
+            input_doc = nargs[0]
+            if isinstance(input_doc, dict):
+                output_values = [input_doc[k] for k in self.input]
+            else:
+                output_values = nargs
+        else:
+            if nargs and isinstance(input_doc := nargs[0], dict):
+                new_args = [input_doc[k] for k in self.input]
+                output_values = self._foo(*new_args, **kwargs, **self.params)
+            else:
+                output_values = self._foo(*nargs, **kwargs, **self.params)
+        if self.output:
+            r = self._dress_as_dict(output_values)
+        else:
+            r = output_values
+        return r
+    def _dress_as_dict(self, transform_result):
+        """Convert transform result to dictionary format.
+        Args:
+            transform_result: Result of the transform
+        Returns:
+            dict: Dictionary representation of the result
+        """
+        if isinstance(transform_result, (list, tuple)) and not self.switch:
+            upd = {k: v for k, v in zip(self.output, transform_result)}
+        else:
+            # TODO : temporary solution works only there is one switch clause
+            upd = {self.output[-1]: transform_result}
+        for k0, (q, qq) in self.switch.items():
+            upd.update({q: k0})
+        return upd
+    @property
+    def is_dummy(self):
+        """Check if this is a dummy transform.
+        Returns:
+            bool: True if this is a dummy transform
+        """
+        return (self.name is not None) and (not self.map and self._foo is None)
+    def update(self, t: Transform):
+        """Update this transform with another transform's configuration.
+        Args:
+            t: Transform to update from
+        Returns:
+            Transform: Updated transform
+        """
+        t_copy = deepcopy(t)
+        if self.input:
+            t_copy.input = self.input
+        if self.output:
+            t_copy.output = self.output
+        if self.params:
+            t_copy.params.update(self.params)
+        t_copy.__post_init__()
+        return t_copy
+    def get_barebone(
+        self, other: Optional[Transform]
+    ) -> tuple[Optional[Transform], Optional[Transform]]:
+        """Get the barebone transform configuration.
+        Args:
+            other: Optional transform to use as base
+        Returns:
+            tuple[Optional[Transform], Optional[Transform]]: Updated self transform
+            and transform to store in library
+        """
+        self_param = self.to_dict(skip_defaults=True)
+        if self.foo is not None:
+            # self will be the lib transform
+            return None, self
+        elif other is not None and other.foo is not None:
+            # init self from other
+            self_param.pop("foo", None)
+            self_param.pop("module", None)
+            other_param = other.to_dict(skip_defaults=True)
+            other_param.update(self_param)
+            return Transform(**other_param), None
+        else:
+            return None, None

graflo/architecture/util.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Utility functions for graph architecture operations.
+This module provides utility functions for working with graph data structures
+and transformations. It includes functions for dictionary projection and
+graph entity name formatting.
+Key Functions:
+    - project_dict: Project dictionary fields based on inclusion/exclusion
+    - cast_graph_name_to_triple: Convert graph names to standardized triple format
+Example:
+    >>> data = {"a": 1, "b": 2, "c": 3}
+    >>> project_dict(data, ["a", "b"], how="include")
+    {'a': 1, 'b': 2}
+    >>> cast_graph_name_to_triple("user_post_graph")
+    ('user', 'post', None)
+"""
+from __future__ import annotations
+from typing import Union
+from graflo.architecture.onto import GraphEntity
+def project_dict(item, keys, how="include"):
+    """Project dictionary fields based on inclusion or exclusion.
+    This function filters a dictionary based on a list of keys, either including
+    or excluding the specified keys.
+    Args:
+        item: Dictionary to project
+        keys: List of keys to include or exclude
+        how: Projection mode - "include" or "exclude" (default: "include")
+    Returns:
+        dict: Projected dictionary containing only the specified fields
+    Example:
+        >>> data = {"a": 1, "b": 2, "c": 3}
+        >>> project_dict(data, ["a", "b"], how="include")
+        {'a': 1, 'b': 2}
+        >>> project_dict(data, ["a"], how="exclude")
+        {'b': 2, 'c': 3}
+    """
+    if how == "include":
+        return {k: v for k, v in item.items() if k in keys}
+    elif how == "exclude":
+        return {k: v for k, v in item.items() if k not in keys}
+    else:
+        return {}
+def cast_graph_name_to_triple(s: GraphEntity) -> Union[str, tuple]:
+    """Convert a graph name string to a triple format.
+    This function parses graph entity names into a standardized triple format
+    (source, target, type). It handles various naming patterns and special
+    suffixes like "graph" or "edges".
+    Args:
+        s: Graph entity name or ID
+    Returns:
+        Union[str, tuple]: Either a string for simple names or a tuple
+            representing (source, target, type) for complex names
+    Raises:
+        ValueError: If the graph name cannot be cast to a valid format
+    Example:
+        >>> cast_graph_name_to_triple("user_post_graph")
+        ('user', 'post', None)
+        >>> cast_graph_name_to_triple("simple_vertex")
+        ('simple', None)
+    """
+    if isinstance(s, str):
+        s2 = s.split("_")
+        if len(s2) < 2:
+            return s2[0]
+        elif len(s2) == 2:
+            return *s2[:-1], None
+        elif len(s2) == 3:
+            if s2[-1] in ["graph", "edges"]:
+                return *s2[:-1], None
+            else:
+                return tuple(s2)
+        elif len(s2) == 4 and s2[-1] in ["graph", "edges"]:
+            return tuple(s2[:-1])
+        raise ValueError(f"Invalid graph_name {s} : can not be cast to GraphEntity")
+    else:
+        return s