PyPI - cognee - Versions diffs - 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

cognee 0.3.4.dev3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

cognee/tasks/codingagents/coding_rule_associations.py CHANGED Viewed

@@ -4,6 +4,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.low_level import DataPoint
+from cognee.infrastructure.llm.prompts import render_prompt
 from cognee.infrastructure.llm import LLMGateway
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.engine.models import NodeSet
@@ -104,8 +105,8 @@ async def add_rule_associations(
     user_context = {"chat": data, "rules": existing_rules}
-    user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
-    system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
+    user_prompt = render_prompt(user_prompt_location, context=user_context)
+    system_prompt = render_prompt(system_prompt_location, context={})
     rule_list = await LLMGateway.acreate_structured_output(
         text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet

cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import List
 from pydantic import BaseModel
+from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
 from cognee.modules.engine.models import Entity
 from cognee.modules.engine.models.EntityType import EntityType
@@ -50,8 +51,8 @@ class LLMEntityExtractor(BaseEntityExtractor):
         try:
             logger.info(f"Extracting entities from text: {text[:100]}...")
-            user_prompt = LLMGateway.render_prompt(self.user_prompt_template, {"text": text})
-            system_prompt = LLMGateway.read_query_prompt(self.system_prompt_template)
+            user_prompt = render_prompt(self.user_prompt_template, {"text": text})
+            system_prompt = read_query_prompt(self.system_prompt_template)
             response = await LLMGateway.acreate_structured_output(
                 text_input=user_prompt,

cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Tuple
 from pydantic import BaseModel
+from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.root_dir import get_absolute_path
@@ -32,12 +33,12 @@ async def extract_content_nodes_and_relationship_names(
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = LLMGateway.render_prompt(
+        text_input = render_prompt(
             "extract_graph_relationship_names_prompt_input.txt",
             context,
             base_directory=base_directory,
         )
-        system_prompt = LLMGateway.read_query_prompt(
+        system_prompt = read_query_prompt(
             "extract_graph_relationship_names_prompt_system.txt", base_directory=base_directory
         )
         response = await LLMGateway.acreate_structured_output(

cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import List
+from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.root_dir import get_absolute_path
@@ -26,10 +27,10 @@ async def extract_edge_triplets(
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = LLMGateway.render_prompt(
+        text_input = render_prompt(
             "extract_graph_edge_triplets_prompt_input.txt", context, base_directory=base_directory
         )
-        system_prompt = LLMGateway.read_query_prompt(
+        system_prompt = read_query_prompt(
             "extract_graph_edge_triplets_prompt_system.txt", base_directory=base_directory
         )
         extracted_graph = await LLMGateway.acreate_structured_output(

cognee/tasks/graph/cascade_extract/utils/extract_nodes.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List
 from pydantic import BaseModel
+from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.root_dir import get_absolute_path
@@ -24,10 +25,10 @@ async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
             "text": text,
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = LLMGateway.render_prompt(
+        text_input = render_prompt(
             "extract_graph_nodes_prompt_input.txt", context, base_directory=base_directory
         )
-        system_prompt = LLMGateway.read_query_prompt(
+        system_prompt = read_query_prompt(
             "extract_graph_nodes_prompt_system.txt", base_directory=base_directory
         )
         response = await LLMGateway.acreate_structured_output(

cognee/tasks/graph/extract_graph_from_code.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 from typing import Type, List
 from pydantic import BaseModel
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_content_graph
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.tasks.storage import add_data_points
@@ -18,7 +18,7 @@ async def extract_graph_from_code(
         - Graph nodes are stored using the `add_data_points` function for later retrieval or analysis.
     """
     chunk_graphs = await asyncio.gather(
-        *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
+        *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
     )
     for chunk_index, chunk in enumerate(data_chunks):

cognee/tasks/graph/extract_graph_from_data.py CHANGED Viewed

@@ -3,15 +3,21 @@ from typing import Type, List, Optional
 from pydantic import BaseModel
 from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.tasks.storage.add_data_points import add_data_points
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.ontology_config import Config
+from cognee.modules.ontology.get_default_ontology_resolver import (
+    get_default_ontology_resolver,
+    get_ontology_resolver_from_env,
+)
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.modules.graph.utils import (
     expand_with_nodes_and_edges,
     retrieve_existing_edges,
 )
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_content_graph
 from cognee.tasks.graph.exceptions import (
     InvalidGraphModelError,
     InvalidDataChunksError,
@@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
     data_chunks: list[DocumentChunk],
     chunk_graphs: list,
     graph_model: Type[BaseModel],
-    ontology_adapter: OntologyResolver,
+    ontology_resolver: BaseOntologyResolver,
 ) -> List[DocumentChunk]:
-    """Updates DocumentChunk objects, integrates data points and edges into databases."""
+    """Integrate chunk graphs with ontology validation and store in databases.
+    This function processes document chunks and their associated knowledge graphs,
+    validates entities against an ontology resolver, and stores the integrated
+    data points and edges in the configured databases.
+    Args:
+        data_chunks: List of document chunks containing source data
+        chunk_graphs: List of knowledge graphs corresponding to each chunk
+        graph_model: Pydantic model class for graph data validation
+        ontology_resolver: Resolver for validating entities against ontology
+    Returns:
+        List of updated DocumentChunk objects with integrated data
+    Raises:
+        InvalidChunkGraphInputError: If input validation fails
+        InvalidGraphModelError: If graph model validation fails
+        InvalidOntologyAdapterError: If ontology resolver validation fails
+    """
     if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
         raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
@@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
         )
     if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
         raise InvalidGraphModelError(graph_model)
-    if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
+    if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
         raise InvalidOntologyAdapterError(
-            type(ontology_adapter).__name__ if ontology_adapter else "None"
+            type(ontology_resolver).__name__ if ontology_resolver else "None"
         )
     graph_engine = await get_graph_engine()
@@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
     )
     graph_nodes, graph_edges = expand_with_nodes_and_edges(
-        data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
+        data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
     )
     if len(graph_nodes) > 0:
@@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
 async def extract_graph_from_data(
     data_chunks: List[DocumentChunk],
     graph_model: Type[BaseModel],
-    ontology_adapter: OntologyResolver = None,
+    config: Config = None,
     custom_prompt: Optional[str] = None,
 ) -> List[DocumentChunk]:
     """
@@ -86,7 +111,7 @@ async def extract_graph_from_data(
     chunk_graphs = await asyncio.gather(
         *[
-            LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
+            extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
             for chunk in data_chunks
         ]
     )
@@ -101,6 +126,24 @@ async def extract_graph_from_data(
                 if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
             ]
-    return await integrate_chunk_graphs(
-        data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
-    )
+    # Extract resolver from config if provided, otherwise get default
+    if config is None:
+        ontology_config = get_ontology_env_config()
+        if (
+            ontology_config.ontology_file_path
+            and ontology_config.ontology_resolver
+            and ontology_config.matching_strategy
+        ):
+            config: Config = {
+                "ontology_config": {
+                    "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+                }
+            }
+        else:
+            config: Config = {
+                "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+            }
+    ontology_resolver = config["ontology_config"]["ontology_resolver"]
+    return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)

cognee/tasks/graph/extract_graph_from_data_v2.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import List
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
 from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
 from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
     extract_content_nodes_and_relationship_names,
@@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
 async def extract_graph_from_data(
     data_chunks: List[DocumentChunk],
     n_rounds: int = 2,
-    ontology_adapter: OntologyResolver = None,
+    ontology_adapter: BaseOntologyResolver = None,
 ) -> List[DocumentChunk]:
-    """Extract and update graph data from document chunks in multiple steps."""
+    """Extract and update graph data from document chunks using cascade extraction.
+    This function performs multi-step graph extraction from document chunks,
+    using cascade extraction techniques to build comprehensive knowledge graphs.
+    Args:
+        data_chunks: List of document chunks to process
+        n_rounds: Number of extraction rounds to perform (default: 2)
+        ontology_adapter: Resolver for validating entities against ontology
+    Returns:
+        List of updated DocumentChunk objects with extracted graph data
+    """
     chunk_nodes = await asyncio.gather(
         *[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
     )
@@ -44,5 +56,5 @@ async def extract_graph_from_data(
         data_chunks=data_chunks,
         chunk_graphs=chunk_graphs,
         graph_model=KnowledgeGraph,
-        ontology_adapter=ontology_adapter or OntologyResolver(),
+        ontology_adapter=ontology_adapter,
     )

cognee/tasks/ingestion/migrate_relational_database.py CHANGED Viewed

@@ -4,16 +4,20 @@ from sqlalchemy import text
 from cognee.infrastructure.databases.relational.get_migration_relational_engine import (
     get_migration_relational_engine,
 )
+from cognee.infrastructure.databases.relational.config import get_migration_config
 from cognee.tasks.storage.index_data_points import index_data_points
 from cognee.tasks.storage.index_graph_edges import index_graph_edges
+from cognee.tasks.schema.ingest_database_schema import ingest_database_schema
 from cognee.modules.engine.models import TableRow, TableType, ColumnValue
 logger = logging.getLogger(__name__)
-async def migrate_relational_database(graph_db, schema, migrate_column_data=True):
+async def migrate_relational_database(
+    graph_db, schema, migrate_column_data=True, schema_only=False
+):
     """
     Migrates data from a relational database into a graph database.
@@ -26,11 +30,133 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
     Both TableType and TableRow inherit from DataPoint to maintain consistency with Cognee data model.
     """
-    engine = get_migration_relational_engine()
     # Create a mapping of node_id to node objects for referencing in edge creation
+    if schema_only:
+        node_mapping, edge_mapping = await schema_only_ingestion(schema)
+    else:
+        node_mapping, edge_mapping = await complete_database_ingestion(schema, migrate_column_data)
+    def _remove_duplicate_edges(edge_mapping):
+        seen = set()
+        unique_original_shape = []
+        for tup in edge_mapping:
+            # We go through all the tuples in the edge_mapping and we only add unique tuples to the list
+            # To eliminate duplicate edges.
+            source_id, target_id, rel_name, rel_dict = tup
+            # We need to convert the dictionary to a frozenset to be able to compare values for it
+            rel_dict_hashable = frozenset(sorted(rel_dict.items()))
+            hashable_tup = (source_id, target_id, rel_name, rel_dict_hashable)
+            # We use the seen set to keep track of unique edges
+            if hashable_tup not in seen:
+                # A list that has frozensets elements instead of dictionaries is needed to be able to compare values
+                seen.add(hashable_tup)
+                # append the original tuple shape (with the dictionary) if it's the first time we see it
+                unique_original_shape.append(tup)
+        return unique_original_shape
+    # Add all nodes and edges to the graph
+    # NOTE: Nodes and edges have to be added in batch for speed optimization, Especially for NetworkX.
+    #       If we'd create nodes and add them to graph in real time the process would take too long.
+    #       Every node and edge added to NetworkX is saved to file which is very slow when not done in batches.
+    await graph_db.add_nodes(list(node_mapping.values()))
+    await graph_db.add_edges(_remove_duplicate_edges(edge_mapping))
+    # In these steps we calculate the vector embeddings of our nodes and edges and save them to vector database
+    # Cognee uses this information to perform searches on the knowledge graph.
+    await index_data_points(list(node_mapping.values()))
+    await index_graph_edges()
+    logger.info("Data successfully migrated from relational database to desired graph database.")
+    return await graph_db.get_graph_data()
+async def schema_only_ingestion(schema):
     node_mapping = {}
     edge_mapping = []
+    # Calling the ingest_database_schema function to return DataPoint subclasses
+    result = await ingest_database_schema(
+        schema=schema,
+        max_sample_rows=5,
+    )
+    database_schema = result["database_schema"]
+    schema_tables = result["schema_tables"]
+    schema_relationships = result["relationships"]
+    database_node_id = database_schema.id
+    node_mapping[database_node_id] = database_schema
+    for table in schema_tables:
+        table_node_id = table.id
+        # Add TableSchema Datapoint as a node.
+        node_mapping[table_node_id] = table
+        edge_mapping.append(
+            (
+                table_node_id,
+                database_node_id,
+                "is_part_of",
+                dict(
+                    source_node_id=table_node_id,
+                    target_node_id=database_node_id,
+                    relationship_name="is_part_of",
+                ),
+            )
+        )
+    table_name_to_id = {t.name: t.id for t in schema_tables}
+    for rel in schema_relationships:
+        source_table_id = table_name_to_id.get(rel.source_table)
+        target_table_id = table_name_to_id.get(rel.target_table)
+        relationship_id = rel.id
+        # Add RelationshipTable DataPoint as a node.
+        node_mapping[relationship_id] = rel
+        edge_mapping.append(
+            (
+                source_table_id,
+                relationship_id,
+                "has_relationship",
+                dict(
+                    source_node_id=source_table_id,
+                    target_node_id=relationship_id,
+                    relationship_name=rel.relationship_type,
+                ),
+            )
+        )
+        edge_mapping.append(
+            (
+                relationship_id,
+                target_table_id,
+                "has_relationship",
+                dict(
+                    source_node_id=relationship_id,
+                    target_node_id=target_table_id,
+                    relationship_name=rel.relationship_type,
+                ),
+            )
+        )
+        edge_mapping.append(
+            (
+                source_table_id,
+                target_table_id,
+                rel.relationship_type,
+                dict(
+                    source_node_id=source_table_id,
+                    target_node_id=target_table_id,
+                    relationship_name=rel.relationship_type,
+                ),
+            )
+        )
+    return node_mapping, edge_mapping
+async def complete_database_ingestion(schema, migrate_column_data):
+    engine = get_migration_relational_engine()
+    # Create a mapping of node_id to node objects for referencing in edge creation
+    node_mapping = {}
+    edge_mapping = []
     async with engine.engine.begin() as cursor:
         # First, create table type nodes for all tables
         for table_name, details in schema.items():
@@ -38,7 +164,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
             table_node = TableType(
                 id=uuid5(NAMESPACE_OID, name=table_name),
                 name=table_name,
-                description=f"Table: {table_name}",
+                description=f'Relational database table with the following name: "{table_name}".',
             )
             # Add TableType node to mapping ( node will be added to the graph later based on this mapping )
@@ -75,7 +201,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
                     name=node_id,
                     is_a=table_node,
                     properties=str(row_properties),
-                    description=f"Row in {table_name} with {primary_key_col}={primary_key_value}",
+                    description=f'Row in relational database table from the table with the name: "{table_name}" with the following row data {str(row_properties)} where the dictionary key value is the column name and the value is the column value. This row has the id of: {node_id}',
                 )
                 # Store the node object in our mapping
@@ -113,7 +239,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
                             id=uuid5(NAMESPACE_OID, name=column_node_id),
                             name=column_node_id,
                             properties=f"{key} {value} {table_name}",
-                            description=f"Column name={key} and value={value} from column from table={table_name}",
+                            description=f"column from relational database table={table_name}. Column name={key} and value={value}. The value of the column is related to the following row with this id: {row_node.id}. This column has the following ID: {column_node_id}",
                         )
                         node_mapping[column_node_id] = column_node
@@ -180,39 +306,4 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
                             ),
                         )
                     )
-    def _remove_duplicate_edges(edge_mapping):
-        seen = set()
-        unique_original_shape = []
-        for tup in edge_mapping:
-            # We go through all the tuples in the edge_mapping and we only add unique tuples to the list
-            # To eliminate duplicate edges.
-            source_id, target_id, rel_name, rel_dict = tup
-            # We need to convert the dictionary to a frozenset to be able to compare values for it
-            rel_dict_hashable = frozenset(sorted(rel_dict.items()))
-            hashable_tup = (source_id, target_id, rel_name, rel_dict_hashable)
-            # We use the seen set to keep track of unique edges
-            if hashable_tup not in seen:
-                # A list that has frozensets elements instead of dictionaries is needed to be able to compare values
-                seen.add(hashable_tup)
-                # append the original tuple shape (with the dictionary) if it's the first time we see it
-                unique_original_shape.append(tup)
-        return unique_original_shape
-    # Add all nodes and edges to the graph
-    # NOTE: Nodes and edges have to be added in batch for speed optimization, Especially for NetworkX.
-    #       If we'd create nodes and add them to graph in real time the process would take too long.
-    #       Every node and edge added to NetworkX is saved to file which is very slow when not done in batches.
-    await graph_db.add_nodes(list(node_mapping.values()))
-    await graph_db.add_edges(_remove_duplicate_edges(edge_mapping))
-    # In these steps we calculate the vector embeddings of our nodes and edges and save them to vector database
-    # Cognee uses this information to perform searches on the knowledge graph.
-    await index_data_points(list(node_mapping.values()))
-    await index_graph_edges()
-    logger.info("Data successfully migrated from relational database to desired graph database.")
-    return await graph_db.get_graph_data()
+        return node_mapping, edge_mapping

cognee/tasks/ingestion/resolve_data_directories.py CHANGED Viewed

@@ -32,7 +32,10 @@ async def resolve_data_directories(
         import s3fs
         fs = s3fs.S3FileSystem(
-            key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False
+            key=s3_config.aws_access_key_id,
+            secret=s3_config.aws_secret_access_key,
+            token=s3_config.aws_session_token,
+            anon=False,
         )
     for item in data:

cognee/tasks/schema/ingest_database_schema.py ADDED Viewed

@@ -0,0 +1,134 @@
+import json
+from typing import List, Dict
+from uuid import uuid5, NAMESPACE_OID
+from cognee.infrastructure.engine.models.DataPoint import DataPoint
+from sqlalchemy import text
+from cognee.tasks.schema.models import DatabaseSchema, SchemaTable, SchemaRelationship
+from cognee.infrastructure.databases.relational.get_migration_relational_engine import (
+    get_migration_relational_engine,
+)
+from cognee.infrastructure.databases.relational.config import get_migration_config
+from datetime import datetime, timezone
+async def ingest_database_schema(
+    schema,
+    max_sample_rows: int = 0,
+) -> Dict[str, List[DataPoint] | DataPoint]:
+    """
+    Extract database schema metadata (optionally with sample data) and return DataPoint models for graph construction.
+    Args:
+        schema: Database schema
+        max_sample_rows: Maximum sample rows per table (0 means no sampling)
+    Returns:
+        Dict with keys:
+            "database_schema": DatabaseSchema
+            "schema_tables": List[SchemaTable]
+            "relationships": List[SchemaRelationship]
+    """
+    tables = {}
+    sample_data = {}
+    schema_tables = []
+    schema_relationships = []
+    migration_config = get_migration_config()
+    engine = get_migration_relational_engine()
+    qi = engine.engine.dialect.identifier_preparer.quote
+    try:
+        max_sample_rows = max(0, int(max_sample_rows))
+    except (TypeError, ValueError):
+        max_sample_rows = 0
+    def qname(name: str):
+        split_name = name.split(".")
+        return ".".join(qi(p) for p in split_name)
+    async with engine.engine.begin() as cursor:
+        for table_name, details in schema.items():
+            tn = qname(table_name)
+            if max_sample_rows > 0:
+                rows_result = await cursor.execute(
+                    text(f"SELECT * FROM {tn} LIMIT :limit;"),  # noqa: S608 - tn is fully quoted
+                    {"limit": max_sample_rows},
+                )
+                rows = [dict(r) for r in rows_result.mappings().all()]
+            else:
+                rows = []
+            if engine.engine.dialect.name == "postgresql":
+                if "." in table_name:
+                    schema_part, table_part = table_name.split(".", 1)
+                else:
+                    schema_part, table_part = "public", table_name
+                estimate = await cursor.execute(
+                    text(
+                        "SELECT reltuples::bigint AS estimate "
+                        "FROM pg_class c "
+                        "JOIN pg_namespace n ON n.oid = c.relnamespace "
+                        "WHERE n.nspname = :schema AND c.relname = :table"
+                    ),
+                    {"schema": schema_part, "table": table_part},
+                )
+                row_count_estimate = estimate.scalar() or 0
+            else:
+                count_result = await cursor.execute(text(f"SELECT COUNT(*) FROM {tn};"))  # noqa: S608 - tn is fully quoted
+                row_count_estimate = count_result.scalar()
+            schema_table = SchemaTable(
+                id=uuid5(NAMESPACE_OID, name=f"{table_name}"),
+                name=table_name,
+                columns=json.dumps(details["columns"], default=str),
+                primary_key=details.get("primary_key"),
+                foreign_keys=json.dumps(details.get("foreign_keys", []), default=str),
+                sample_rows=json.dumps(rows, default=str),
+                row_count_estimate=row_count_estimate,
+                description=f"Relational database table with '{table_name}' with {len(details['columns'])} columns and approx. {row_count_estimate} rows."
+                f"Here are the columns this table contains: {details['columns']}"
+                f"Here are a few sample_rows to show the contents of the table: {rows}"
+                f"Table is part of the database: {migration_config.migration_db_name}",
+            )
+            schema_tables.append(schema_table)
+            tables[table_name] = details
+            sample_data[table_name] = rows
+            for fk in details.get("foreign_keys", []):
+                ref_table_fq = fk["ref_table"]
+                if "." not in ref_table_fq and "." in table_name:
+                    ref_table_fq = f"{table_name.split('.', 1)[0]}.{ref_table_fq}"
+                relationship_name = (
+                    f"{table_name}:{fk['column']}->{ref_table_fq}:{fk['ref_column']}"
+                )
+                relationship = SchemaRelationship(
+                    id=uuid5(NAMESPACE_OID, name=relationship_name),
+                    name=relationship_name,
+                    source_table=table_name,
+                    target_table=ref_table_fq,
+                    relationship_type="foreign_key",
+                    source_column=fk["column"],
+                    target_column=fk["ref_column"],
+                    description=f"Relational database table foreign key relationship between: {table_name}.{fk['column']} → {ref_table_fq}.{fk['ref_column']}"
+                    f"This foreing key relationship between table columns is a part of the following database: {migration_config.migration_db_name}",
+                )
+                schema_relationships.append(relationship)
+    id_str = f"{migration_config.migration_db_provider}:{migration_config.migration_db_name}"
+    database_schema = DatabaseSchema(
+        id=uuid5(NAMESPACE_OID, name=id_str),
+        name=migration_config.migration_db_name,
+        database_type=migration_config.migration_db_provider,
+        tables=json.dumps(tables, default=str),
+        sample_data=json.dumps(sample_data, default=str),
+        description=f"Database schema containing {len(schema_tables)} tables and {len(schema_relationships)} relationships. "
+        f"The database type is {migration_config.migration_db_provider}."
+        f"The database contains the following tables: {tables}",
+    )
+    return {
+        "database_schema": database_schema,
+        "schema_tables": schema_tables,
+        "relationships": schema_relationships,
+    }

cognee 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl

cognee 0.3.4.dev3py3-none-any.whl → 0.3.5py3-none-any.whl