PyPI - graflo - Versions diffs - 1.3.7__py3-none-any.whl - Mend

graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show

graflo/README.md +18 -0
graflo/__init__.py +70 -0
graflo/architecture/__init__.py +38 -0
graflo/architecture/actor.py +1276 -0
graflo/architecture/actor_util.py +450 -0
graflo/architecture/edge.py +418 -0
graflo/architecture/onto.py +376 -0
graflo/architecture/onto_sql.py +54 -0
graflo/architecture/resource.py +163 -0
graflo/architecture/schema.py +135 -0
graflo/architecture/transform.py +292 -0
graflo/architecture/util.py +89 -0
graflo/architecture/vertex.py +562 -0
graflo/caster.py +736 -0
graflo/cli/__init__.py +14 -0
graflo/cli/ingest.py +203 -0
graflo/cli/manage_dbs.py +197 -0
graflo/cli/plot_schema.py +132 -0
graflo/cli/xml2json.py +93 -0
graflo/data_source/__init__.py +48 -0
graflo/data_source/api.py +339 -0
graflo/data_source/base.py +95 -0
graflo/data_source/factory.py +304 -0
graflo/data_source/file.py +148 -0
graflo/data_source/memory.py +70 -0
graflo/data_source/registry.py +82 -0
graflo/data_source/sql.py +183 -0
graflo/db/__init__.py +44 -0
graflo/db/arango/__init__.py +22 -0
graflo/db/arango/conn.py +1025 -0
graflo/db/arango/query.py +180 -0
graflo/db/arango/util.py +88 -0
graflo/db/conn.py +377 -0
graflo/db/connection/__init__.py +6 -0
graflo/db/connection/config_mapping.py +18 -0
graflo/db/connection/onto.py +717 -0
graflo/db/connection/wsgi.py +29 -0
graflo/db/manager.py +119 -0
graflo/db/neo4j/__init__.py +16 -0
graflo/db/neo4j/conn.py +639 -0
graflo/db/postgres/__init__.py +37 -0
graflo/db/postgres/conn.py +948 -0
graflo/db/postgres/fuzzy_matcher.py +281 -0
graflo/db/postgres/heuristics.py +133 -0
graflo/db/postgres/inference_utils.py +428 -0
graflo/db/postgres/resource_mapping.py +273 -0
graflo/db/postgres/schema_inference.py +372 -0
graflo/db/postgres/types.py +148 -0
graflo/db/postgres/util.py +87 -0
graflo/db/tigergraph/__init__.py +9 -0
graflo/db/tigergraph/conn.py +2365 -0
graflo/db/tigergraph/onto.py +26 -0
graflo/db/util.py +49 -0
graflo/filter/__init__.py +21 -0
graflo/filter/onto.py +525 -0
graflo/logging.conf +22 -0
graflo/onto.py +312 -0
graflo/plot/__init__.py +17 -0
graflo/plot/plotter.py +616 -0
graflo/util/__init__.py +23 -0
graflo/util/chunker.py +807 -0
graflo/util/merge.py +150 -0
graflo/util/misc.py +37 -0
graflo/util/onto.py +422 -0
graflo/util/transform.py +454 -0
graflo-1.3.7.dist-info/METADATA +243 -0
graflo-1.3.7.dist-info/RECORD +70 -0
graflo-1.3.7.dist-info/WHEEL +4 -0
graflo-1.3.7.dist-info/entry_points.txt +5 -0
graflo-1.3.7.dist-info/licenses/LICENSE +126 -0

graflo/db/postgres/fuzzy_matcher.py ADDED Viewed

@@ -0,0 +1,281 @@
+"""Fuzzy matching utilities for PostgreSQL schema analysis.
+This module provides improved fuzzy matching strategies for identifying
+vertex names from table and column fragments.
+"""
+from difflib import SequenceMatcher
+class FuzzyMatcher:
+    """Improved fuzzy matcher with multiple matching strategies.
+    Uses a combination of matching techniques:
+    1. Exact matching (case-insensitive)
+    2. Substring matching (with length-based scoring)
+    3. Sequence similarity (difflib)
+    4. Prefix/suffix matching
+    5. Common pattern matching (handles id, fk, etc.)
+    """
+    def __init__(self, vertex_names: list[str], threshold: float = 0.6):
+        """Initialize the fuzzy matcher.
+        Args:
+            vertex_names: List of vertex table names to match against
+            threshold: Similarity threshold (0.0 to 1.0)
+        """
+        self.vertex_names = vertex_names
+        self.threshold = threshold
+        # Pre-compute lowercase versions for efficiency
+        self._vertex_lower_map = {vn.lower(): vn for vn in vertex_names}
+        self._vertex_lower_list = list(self._vertex_lower_map.keys())
+    def match(self, fragment: str) -> tuple[str | None, float]:
+        """Match a fragment against vertex names using multiple strategies.
+        Args:
+            fragment: Fragment to match
+        Returns:
+            Tuple of (best_match, score) or (None, 0.0) if no match above threshold
+        """
+        if not self.vertex_names or not fragment:
+            return (None, 0.0)
+        fragment_lower = fragment.lower()
+        # Strategy 1: Exact match (highest priority, returns immediately)
+        if fragment_lower in self._vertex_lower_map:
+            return (self._vertex_lower_map[fragment_lower], 1.0)
+        best_match = None
+        best_score = 0.0
+        # Strategy 2: Substring matching with length-based scoring
+        substring_score = self._substring_match(fragment_lower)
+        if substring_score[1] > best_score:
+            best_match, best_score = substring_score
+        # Strategy 3: Sequence similarity (difflib)
+        sequence_score = self._sequence_match(fragment_lower)
+        if sequence_score[1] > best_score:
+            best_match, best_score = sequence_score
+        # Strategy 4: Prefix/suffix matching
+        prefix_suffix_score = self._prefix_suffix_match(fragment_lower)
+        if prefix_suffix_score[1] > best_score:
+            best_match, best_score = prefix_suffix_score
+        # Strategy 5: Common pattern matching (handles id, fk, etc.)
+        pattern_score = self._pattern_match(fragment_lower)
+        if pattern_score[1] > best_score:
+            best_match, best_score = pattern_score
+        # Return match only if above threshold
+        if best_score >= self.threshold:
+            return (best_match, best_score)
+        return (None, 0.0)
+    def _substring_match(self, fragment_lower: str) -> tuple[str | None, float]:
+        """Match using substring containment with length-based scoring.
+        Args:
+            fragment_lower: Lowercase fragment to match
+        Returns:
+            Tuple of (best_match, score)
+        """
+        best_match = None
+        best_score = 0.0
+        for vertex_lower, vertex_name in self._vertex_lower_map.items():
+            # Check if fragment is contained in vertex or vice versa
+            if fragment_lower in vertex_lower:
+                # Fragment is substring of vertex (e.g., "user" in "users")
+                score = len(fragment_lower) / len(vertex_lower)
+                # Boost score if fragment is significant portion
+                if len(fragment_lower) >= 3:  # At least 3 chars
+                    score = min(score * 1.2, 0.95)  # Cap at 0.95
+                if score > best_score:
+                    best_score = score
+                    best_match = vertex_name
+            elif vertex_lower in fragment_lower:
+                # Vertex is substring of fragment (e.g., "user" in "user_id")
+                score = len(vertex_lower) / len(fragment_lower)
+                # Boost score if vertex is significant portion
+                if len(vertex_lower) >= 3:
+                    score = min(score * 1.2, 0.95)
+                if score > best_score:
+                    best_score = score
+                    best_match = vertex_name
+        return (best_match, best_score)
+    def _sequence_match(self, fragment_lower: str) -> tuple[str | None, float]:
+        """Match using sequence similarity (difflib).
+        Args:
+            fragment_lower: Lowercase fragment to match
+        Returns:
+            Tuple of (best_match, score)
+        """
+        best_match = None
+        best_score = 0.0
+        for vertex_lower, vertex_name in self._vertex_lower_map.items():
+            similarity = SequenceMatcher(None, fragment_lower, vertex_lower).ratio()
+            if similarity > best_score:
+                best_score = similarity
+                best_match = vertex_name
+        return (best_match, best_score)
+    def _prefix_suffix_match(self, fragment_lower: str) -> tuple[str | None, float]:
+        """Match using prefix or suffix patterns.
+        Args:
+            fragment_lower: Lowercase fragment to match
+        Returns:
+            Tuple of (best_match, score)
+        """
+        best_match = None
+        best_score = 0.0
+        for vertex_lower, vertex_name in self._vertex_lower_map.items():
+            # Check prefix match
+            if fragment_lower.startswith(vertex_lower):
+                score = len(vertex_lower) / len(fragment_lower)
+                if score > best_score:
+                    best_score = score
+                    best_match = vertex_name
+            # Check suffix match
+            elif fragment_lower.endswith(vertex_lower):
+                score = len(vertex_lower) / len(fragment_lower)
+                if score > best_score:
+                    best_score = score
+                    best_match = vertex_name
+            # Check if vertex starts with fragment
+            elif vertex_lower.startswith(fragment_lower):
+                score = len(fragment_lower) / len(vertex_lower)
+                if score > best_score:
+                    best_score = score
+                    best_match = vertex_name
+        return (best_match, best_score)
+    def _pattern_match(self, fragment_lower: str) -> tuple[str | None, float]:
+        """Match using common patterns (id, fk, etc.).
+        Args:
+            fragment_lower: Lowercase fragment to match
+        Returns:
+            Tuple of (best_match, score)
+        """
+        # Common suffixes/prefixes to remove
+        common_patterns = [
+            ("_id", ""),
+            ("_fk", ""),
+            ("_key", ""),
+            ("_pk", ""),
+            ("_ref", ""),
+            ("_reference", ""),
+            ("id_", ""),
+            ("fk_", ""),
+            ("key_", ""),
+            ("pk_", ""),
+            ("ref_", ""),
+            ("reference_", ""),
+        ]
+        best_match = None
+        best_score = 0.0
+        # Try removing common patterns and matching
+        for pattern, replacement in common_patterns:
+            if fragment_lower.endswith(pattern):
+                base = fragment_lower[: -len(pattern)]
+                if base in self._vertex_lower_map:
+                    # High score for pattern-based matches
+                    score = 0.9
+                    if score > best_score:
+                        best_score = score
+                        best_match = self._vertex_lower_map[base]
+            elif fragment_lower.startswith(pattern):
+                base = fragment_lower[len(pattern) :]
+                if base in self._vertex_lower_map:
+                    score = 0.9
+                    if score > best_score:
+                        best_score = score
+                        best_match = self._vertex_lower_map[base]
+        return (best_match, best_score)
+class FuzzyMatchCache:
+    """Cache for fuzzy matching fragments to vertex names.
+    Pre-computes fuzzy matches for all fragments to avoid redundant computations.
+    This significantly improves performance when processing multiple tables.
+    """
+    def __init__(self, vertex_names: list[str], threshold: float = 0.6):
+        """Initialize the fuzzy match cache.
+        Args:
+            vertex_names: List of vertex table names to match against
+            threshold: Similarity threshold (0.0 to 1.0)
+        """
+        self.vertex_names = vertex_names
+        self.threshold = threshold
+        self._matcher = FuzzyMatcher(vertex_names, threshold)
+        self._cache: dict[str, str | None] = {}
+        self._build_cache()
+    def _build_cache(self) -> None:
+        """Pre-compute fuzzy matches for common patterns."""
+        # Pre-compute exact matches (case-insensitive)
+        for vertex_name in self.vertex_names:
+            vertex_lower = vertex_name.lower()
+            self._cache[vertex_lower] = vertex_name
+            # Also cache common variations
+            for suffix in ["id", "fk", "key", "pk", "ref", "reference"]:
+                self._cache[f"{vertex_lower}_{suffix}"] = vertex_name
+                self._cache[f"{suffix}_{vertex_lower}"] = vertex_name
+    def get_match(self, fragment: str) -> str | None:
+        """Get cached fuzzy match for a fragment, computing if not cached.
+        Args:
+            fragment: Fragment to match
+        Returns:
+            Best matching vertex name or None if no match above threshold
+        """
+        fragment_lower = fragment.lower()
+        # Check cache first
+        if fragment_lower in self._cache:
+            return self._cache[fragment_lower]
+        # Compute match if not cached using improved matcher
+        match, _ = self._matcher.match(fragment)
+        self._cache[fragment_lower] = match
+        return match
+    def batch_match(self, fragments: list[str]) -> dict[str, str | None]:
+        """Match multiple fragments in batch, using cache when possible.
+        Args:
+            fragments: List of fragments to match
+        Returns:
+            Dictionary mapping fragments to their matched vertex names (or None)
+        """
+        results = {}
+        for fragment in fragments:
+            results[fragment] = self.get_match(fragment)
+        return results

graflo/db/postgres/heuristics.py ADDED Viewed

@@ -0,0 +1,133 @@
+import logging
+from graflo.util.onto import Patterns, TablePattern
+from graflo.db.postgres.conn import (
+    PostgresConnection,
+)
+from graflo.db.postgres.resource_mapping import PostgresResourceMapper
+from graflo.db.postgres.schema_inference import PostgresSchemaInferencer
+logger = logging.getLogger(__name__)
+def create_patterns_from_postgres(
+    conn: PostgresConnection, schema_name: str | None = None
+) -> Patterns:
+    """Create Patterns from PostgreSQL tables.
+    Args:
+        conn: PostgresConnection instance
+        schema_name: Schema name to introspect
+    Returns:
+        Patterns: Patterns object with TablePattern instances for all tables
+    """
+    # Introspect the schema
+    introspection_result = conn.introspect_schema(schema_name=schema_name)
+    # Create patterns
+    patterns = Patterns()
+    # Get schema name
+    effective_schema = schema_name or introspection_result.schema_name
+    # Store the connection config
+    config_key = "default"
+    patterns.postgres_configs[(config_key, effective_schema)] = conn.config
+    # Add patterns for vertex tables
+    for table_info in introspection_result.vertex_tables:
+        table_name = table_info.name
+        table_pattern = TablePattern(
+            table_name=table_name,
+            schema_name=effective_schema,
+            resource_name=table_name,
+        )
+        patterns.patterns[table_name] = table_pattern
+        patterns.postgres_table_configs[table_name] = (
+            config_key,
+            effective_schema,
+            table_name,
+        )
+    # Add patterns for edge tables
+    for table_info in introspection_result.edge_tables:
+        table_name = table_info.name
+        table_pattern = TablePattern(
+            table_name=table_name,
+            schema_name=effective_schema,
+            resource_name=table_name,
+        )
+        patterns.patterns[table_name] = table_pattern
+        patterns.postgres_table_configs[table_name] = (
+            config_key,
+            effective_schema,
+            table_name,
+        )
+    return patterns
+def create_resources_from_postgres(
+    conn: PostgresConnection, schema, schema_name: str | None = None
+):
+    """Create Resources from PostgreSQL tables for an existing schema.
+    Args:
+        conn: PostgresConnection instance
+        schema: Existing Schema object
+        schema_name: Schema name to introspect
+    Returns:
+        list[Resource]: List of Resources for PostgreSQL tables
+    """
+    # Introspect the schema
+    introspection_result = conn.introspect_schema(schema_name=schema_name)
+    # Map tables to resources
+    mapper = PostgresResourceMapper()
+    resources = mapper.map_tables_to_resources(
+        introspection_result, schema.vertex_config, schema.edge_config
+    )
+    return resources
+def infer_schema_from_postgres(
+    conn: PostgresConnection, schema_name: str | None = None, db_flavor=None
+):
+    """Convenience function to infer a graflo Schema from PostgreSQL database.
+    Args:
+        conn: PostgresConnection instance
+        schema_name: Schema name to introspect (defaults to config schema_name or 'public')
+        db_flavor: Target database flavor (defaults to ARANGO)
+    Returns:
+        Schema: Inferred schema with vertices, edges, and resources
+    """
+    from graflo.onto import DBFlavor
+    if db_flavor is None:
+        db_flavor = DBFlavor.ARANGO
+    # Introspect the schema
+    introspection_result = conn.introspect_schema(schema_name=schema_name)
+    # Infer schema (pass connection for type sampling)
+    inferencer = PostgresSchemaInferencer(db_flavor=db_flavor, conn=conn)
+    schema = inferencer.infer_schema(introspection_result, schema_name=schema_name)
+    # Create and add resources
+    mapper = PostgresResourceMapper()
+    resources = mapper.map_tables_to_resources(
+        introspection_result, schema.vertex_config, schema.edge_config
+    )
+    # Update schema with resources
+    schema.resources = resources
+    # Re-initialize to set up resource mappings
+    schema.__post_init__()
+    return schema