PyPI - cognee - Versions diffs - 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

cognee 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (214) hide show

cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine.models import DataPoint
-from cognee.modules.data.extraction.extract_categories import extract_categories
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
         return data_chunks
     chunk_classifications = await asyncio.gather(
-        *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
+        *[LLMGateway.extract_categories(chunk.text, classification_model) for chunk in data_chunks],
     )
     classification_data_points = []

cognee/tasks/documents/extract_chunks_from_documents.py CHANGED Viewed

@@ -8,7 +8,6 @@ from cognee.modules.data.models import Data
 from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.modules.chunking.TextChunker import TextChunker
 from cognee.modules.chunking.Chunker import Chunker
-from cognee.modules.data.processing.document_types.exceptions.exceptions import PyPdfInternalError
 async def update_document_token_count(document_id: UUID, token_count: int) -> None:
@@ -40,15 +39,14 @@ async def extract_chunks_from_documents(
     """
     for document in documents:
         document_token_count = 0
-        try:
-            async for document_chunk in document.read(
-                max_chunk_size=max_chunk_size, chunker_cls=chunker
-            ):
-                document_token_count += document_chunk.chunk_size
-                document_chunk.belongs_to_set = document.belongs_to_set
-                yield document_chunk
-            await update_document_token_count(document.id, document_token_count)
-        except PyPdfInternalError:
-            pass
+        async for document_chunk in document.read(
+            max_chunk_size=max_chunk_size, chunker_cls=chunker
+        ):
+            document_token_count += document_chunk.chunk_size
+            document_chunk.belongs_to_set = document.belongs_to_set
+            yield document_chunk
+        await update_document_token_count(document.id, document_token_count)
         # todo rita

cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py CHANGED Viewed

@@ -6,8 +6,7 @@ from pydantic import BaseModel
 from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
 from cognee.modules.engine.models import Entity
 from cognee.modules.engine.models.EntityType import EntityType
-from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 logger = get_logger("llm_entity_extractor")
@@ -51,11 +50,10 @@ class LLMEntityExtractor(BaseEntityExtractor):
         try:
             logger.info(f"Extracting entities from text: {text[:100]}...")
-            llm_client = get_llm_client()
-            user_prompt = render_prompt(self.user_prompt_template, {"text": text})
-            system_prompt = read_query_prompt(self.system_prompt_template)
+            user_prompt = LLMGateway.render_prompt(self.user_prompt_template, {"text": text})
+            system_prompt = LLMGateway.read_query_prompt(self.system_prompt_template)
-            response = await llm_client.acreate_structured_output(
+            response = await LLMGateway.acreate_structured_output(
                 text_input=user_prompt,
                 system_prompt=system_prompt,
                 response_model=EntityList,

cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from typing import List, Tuple
 from pydantic import BaseModel
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.root_dir import get_absolute_path
@@ -17,7 +16,6 @@ async def extract_content_nodes_and_relationship_names(
     content: str, existing_nodes: List[str], n_rounds: int = 2
 ) -> Tuple[List[str], List[str]]:
     """Extracts node names and relationship_names from content through multiple rounds of analysis."""
-    llm_client = get_llm_client()
     all_nodes: List[str] = existing_nodes.copy()
     all_relationship_names: List[str] = []
     existing_node_set = {node.lower() for node in all_nodes}
@@ -34,15 +32,15 @@ async def extract_content_nodes_and_relationship_names(
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = render_prompt(
+        text_input = LLMGateway.render_prompt(
             "extract_graph_relationship_names_prompt_input.txt",
             context,
             base_directory=base_directory,
         )
-        system_prompt = read_query_prompt(
+        system_prompt = LLMGateway.read_query_prompt(
             "extract_graph_relationship_names_prompt_system.txt", base_directory=base_directory
         )
-        response = await llm_client.acreate_structured_output(
+        response = await LLMGateway.acreate_structured_output(
             text_input=text_input,
             system_prompt=system_prompt,
             response_model=PotentialNodesAndRelationshipNames,

cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import List, Tuple
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
+from typing import List
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.root_dir import get_absolute_path
@@ -9,7 +9,6 @@ async def extract_edge_triplets(
     content: str, nodes: List[str], relationship_names: List[str], n_rounds: int = 2
 ) -> KnowledgeGraph:
     """Creates a knowledge graph by identifying relationships between the provided nodes."""
-    llm_client = get_llm_client()
     final_graph = KnowledgeGraph(nodes=[], edges=[])
     existing_nodes = set()
     existing_node_ids = set()
@@ -27,13 +26,13 @@ async def extract_edge_triplets(
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = render_prompt(
+        text_input = LLMGateway.render_prompt(
             "extract_graph_edge_triplets_prompt_input.txt", context, base_directory=base_directory
         )
-        system_prompt = read_query_prompt(
+        system_prompt = LLMGateway.read_query_prompt(
             "extract_graph_edge_triplets_prompt_system.txt", base_directory=base_directory
         )
-        extracted_graph = await llm_client.acreate_structured_output(
+        extracted_graph = await LLMGateway.acreate_structured_output(
             text_input=text_input, system_prompt=system_prompt, response_model=KnowledgeGraph
         )

cognee/tasks/graph/cascade_extract/utils/extract_nodes.py CHANGED Viewed

@@ -1,9 +1,7 @@
 from typing import List
 from pydantic import BaseModel
-from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.root_dir import get_absolute_path
@@ -15,7 +13,6 @@ class PotentialNodes(BaseModel):
 async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
     """Extracts node names from content through multiple rounds of analysis."""
-    llm_client = get_llm_client()
     all_nodes: List[str] = []
     existing_nodes = set()
@@ -27,13 +24,13 @@ async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
             "text": text,
         }
         base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
-        text_input = render_prompt(
+        text_input = LLMGateway.render_prompt(
             "extract_graph_nodes_prompt_input.txt", context, base_directory=base_directory
         )
-        system_prompt = read_query_prompt(
+        system_prompt = LLMGateway.read_query_prompt(
             "extract_graph_nodes_prompt_system.txt", base_directory=base_directory
         )
-        response = await llm_client.acreate_structured_output(
+        response = await LLMGateway.acreate_structured_output(
             text_input=text_input, system_prompt=system_prompt, response_model=PotentialNodes
         )

cognee/tasks/graph/extract_graph_from_code.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import asyncio
 from typing import Type, List
 from pydantic import BaseModel
-from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.tasks.storage import add_data_points
@@ -17,7 +18,7 @@ async def extract_graph_from_code(
         - Graph nodes are stored using the `add_data_points` function for later retrieval or analysis.
     """
     chunk_graphs = await asyncio.gather(
-        *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
+        *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
     )
     for chunk_index, chunk in enumerate(data_chunks):

cognee/tasks/graph/extract_graph_from_data.py CHANGED Viewed

@@ -3,15 +3,15 @@ from typing import Type, List
 from pydantic import BaseModel
 from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.tasks.storage.add_data_points import add_data_points
 from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
-from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
 from cognee.modules.graph.utils import (
     expand_with_nodes_and_edges,
     retrieve_existing_edges,
 )
 from cognee.shared.data_models import KnowledgeGraph
-from cognee.tasks.storage.add_data_points import add_data_points
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 async def integrate_chunk_graphs(
@@ -40,6 +40,7 @@ async def integrate_chunk_graphs(
     if len(graph_nodes) > 0:
         await add_data_points(graph_nodes)
     if len(graph_edges) > 0:
         await graph_engine.add_edges(graph_edges)
@@ -55,7 +56,7 @@ async def extract_graph_from_data(
     Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
     """
     chunk_graphs = await asyncio.gather(
-        *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
+        *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
     )
     # Note: Filter edges with missing source or target nodes

cognee/tasks/graph/infer_data_ontology.py CHANGED Viewed

@@ -15,19 +15,19 @@ from pydantic import BaseModel
 from cognee.modules.graph.exceptions import EntityNotFoundError
 from cognee.modules.ingestion.exceptions import IngestionError
-from cognee.infrastructure.llm.prompts import read_query_prompt
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.infrastructure.data.chunking.config import get_chunk_config
 from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine
 from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
 from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
 from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
-from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import (
+from cognee.modules.data.methods.add_model_class_to_graph import (
     add_model_class_to_graph,
 )
 from cognee.tasks.graph.models import NodeModel, GraphOntology
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.modules.engine.utils import generate_node_id, generate_node_name
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 logger = get_logger("task:infer_data_ontology")
@@ -52,11 +52,10 @@ async def extract_ontology(content: str, response_model: Type[BaseModel]):
         The structured ontology extracted from the content.
     """
-    llm_client = get_llm_client()
-    system_prompt = read_query_prompt("extract_ontology.txt")
+    system_prompt = LLMGateway.read_query_prompt("extract_ontology.txt")
-    ontology = await llm_client.acreate_structured_output(content, system_prompt, response_model)
+    ontology = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
     return ontology

cognee/tasks/ingestion/data_item_to_text_file.py ADDED Viewed

@@ -0,0 +1,79 @@
+import os
+from urllib.parse import urlparse
+from typing import List, Tuple
+from pathlib import Path
+import tempfile
+from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
+from cognee.modules.ingestion.exceptions import IngestionError
+from cognee.infrastructure.loaders import get_loader_engine
+from cognee.shared.logging_utils import get_logger
+from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from pydantic_settings import BaseSettings, SettingsConfigDict
+logger = get_logger(__name__)
+class SaveDataSettings(BaseSettings):
+    accept_local_file_path: bool = True
+    model_config = SettingsConfigDict(env_file=".env", extra="allow")
+settings = SaveDataSettings()
+async def pull_from_s3(file_path, destination_file) -> None:
+    async with open_data_file(file_path) as file:
+        while True:
+            chunk = file.read(8192)
+            if not chunk:
+                break
+            destination_file.write(chunk)
+async def data_item_to_text_file(
+    data_item_path: str, preferred_loaders: List[str]
+) -> Tuple[str, LoaderInterface]:
+    if isinstance(data_item_path, str):
+        parsed_url = urlparse(data_item_path)
+        # data is s3 file path
+        if parsed_url.scheme == "s3":
+            # TODO: Rework this to work with file streams and not saving data to temp storage
+            # Note: proper suffix information is needed for OpenAI to handle mp3 files
+            path_info = Path(parsed_url.path)
+            with tempfile.NamedTemporaryFile(mode="wb", suffix=path_info.suffix) as temp_file:
+                await pull_from_s3(data_item_path, temp_file)
+                temp_file.flush()  # Data needs to be saved to local storage
+                loader = get_loader_engine()
+                return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
+                    temp_file.name, preferred_loaders
+                )
+        # data is local file path
+        elif parsed_url.scheme == "file":
+            if settings.accept_local_file_path:
+                loader = get_loader_engine()
+                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
+                    data_item_path, preferred_loaders
+                )
+            else:
+                raise IngestionError(message="Local files are not accepted.")
+        # data is an absolute file path
+        elif data_item_path.startswith("/") or (
+            os.name == "nt" and len(data_item_path) > 1 and data_item_path[1] == ":"
+        ):
+            # Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
+            if settings.accept_local_file_path:
+                loader = get_loader_engine()
+                return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
+                    data_item_path, preferred_loaders
+                )
+            else:
+                raise IngestionError(message="Local files are not accepted.")
+    # data is not a supported type
+    raise IngestionError(message=f"Data type not supported: {type(data_item_path)}")

cognee/tasks/ingestion/ingest_data.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import json
 import inspect
-from os import path
 from uuid import UUID
 from typing import Union, BinaryIO, Any, List, Optional
 import cognee.modules.ingestion as ingestion
-from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.modules.data.models import Data
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+from cognee.infrastructure.files.utils.open_data_file import open_data_file
+from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
 from cognee.modules.data.methods import (
     get_authorized_existing_datasets,
     get_dataset_data,
@@ -18,6 +18,7 @@ from cognee.modules.data.methods import (
 )
 from .save_data_item_to_storage import save_data_item_to_storage
+from .data_item_to_text_file import data_item_to_text_file
 async def ingest_data(
@@ -26,6 +27,7 @@ async def ingest_data(
     user: User,
     node_set: Optional[List[str]] = None,
     dataset_id: UUID = None,
+    preferred_loaders: List[str] = None,
 ):
     if not user:
         user = await get_default_user()
@@ -42,6 +44,7 @@ async def ingest_data(
         user: User,
         node_set: Optional[List[str]] = None,
         dataset_id: UUID = None,
+        preferred_loaders: List[str] = None,
     ):
         new_datapoints = []
         existing_data_points = []
@@ -74,71 +77,96 @@ async def ingest_data(
         dataset_data_map = {str(data.id): True for data in dataset_data}
         for data_item in data:
-            file_path = await save_data_item_to_storage(data_item)
+            # Get file path of data item or create a file it doesn't exist
+            original_file_path = await save_data_item_to_storage(data_item)
+            # Transform file path to be OS usable
+            actual_file_path = get_data_file_path(original_file_path)
-            # Ingest data and add metadata
-            async with open_data_file(file_path) as file:
+            # Store all input data as text files in Cognee data storage
+            cognee_storage_file_path, loader_engine = await data_item_to_text_file(
+                actual_file_path, preferred_loaders
+            )
+            # Find metadata from original file
+            async with open_data_file(original_file_path) as file:
                 classified_data = ingestion.classify(file)
-                # data_id is the hash of file contents + owner id to avoid duplicate data
+                # data_id is the hash of original file contents + owner id to avoid duplicate data
                 data_id = ingestion.identify(classified_data, user)
+                original_file_metadata = classified_data.get_metadata()
-                file_metadata = classified_data.get_metadata()
-                from sqlalchemy import select
-                db_engine = get_relational_engine()
-                # Check to see if data should be updated
-                async with db_engine.get_async_session() as session:
-                    data_point = (
-                        await session.execute(select(Data).filter(Data.id == data_id))
-                    ).scalar_one_or_none()
-                ext_metadata = get_external_metadata_dict(data_item)
-                if node_set:
-                    ext_metadata["node_set"] = node_set
-                if data_point is not None:
-                    data_point.name = file_metadata["name"]
-                    data_point.raw_data_location = file_metadata["file_path"]
-                    data_point.extension = file_metadata["extension"]
-                    data_point.mime_type = file_metadata["mime_type"]
-                    data_point.owner_id = user.id
-                    data_point.content_hash = file_metadata["content_hash"]
-                    data_point.file_size = file_metadata["file_size"]
-                    data_point.external_metadata = ext_metadata
-                    data_point.node_set = json.dumps(node_set) if node_set else None
-                    data_point.tenant_id = user.tenant_id if user.tenant_id else None
-                    # Check if data is already in dataset
-                    if str(data_point.id) in dataset_data_map:
-                        existing_data_points.append(data_point)
-                    else:
-                        dataset_new_data_points.append(data_point)
-                        dataset_data_map[str(data_point.id)] = True
+            # Find metadata from Cognee data storage text file
+            async with open_data_file(cognee_storage_file_path) as file:
+                classified_data = ingestion.classify(file)
+                storage_file_metadata = classified_data.get_metadata()
+            from sqlalchemy import select
+            db_engine = get_relational_engine()
+            # Check to see if data should be updated
+            async with db_engine.get_async_session() as session:
+                data_point = (
+                    await session.execute(select(Data).filter(Data.id == data_id))
+                ).scalar_one_or_none()
+            # TODO: Maybe allow getting of external metadata through ingestion loader?
+            ext_metadata = get_external_metadata_dict(data_item)
+            if node_set:
+                ext_metadata["node_set"] = node_set
+            if data_point is not None:
+                data_point.name = original_file_metadata["name"]
+                data_point.raw_data_location = cognee_storage_file_path
+                data_point.original_data_location = original_file_metadata["file_path"]
+                data_point.extension = storage_file_metadata["extension"]
+                data_point.mime_type = storage_file_metadata["mime_type"]
+                data_point.original_extension = original_file_metadata["extension"]
+                data_point.original_mime_type = original_file_metadata["mime_type"]
+                data_point.loader_engine = loader_engine.loader_name
+                data_point.owner_id = user.id
+                data_point.content_hash = original_file_metadata["content_hash"]
+                data_point.raw_content_hash = storage_file_metadata["content_hash"]
+                data_point.file_size = original_file_metadata["file_size"]
+                data_point.external_metadata = ext_metadata
+                data_point.node_set = json.dumps(node_set) if node_set else None
+                data_point.tenant_id = user.tenant_id if user.tenant_id else None
+                # Check if data is already in dataset
+                if str(data_point.id) in dataset_data_map:
+                    existing_data_points.append(data_point)
                 else:
-                    if str(data_id) in dataset_data_map:
-                        continue
-                    data_point = Data(
-                        id=data_id,
-                        name=file_metadata["name"],
-                        raw_data_location=file_metadata["file_path"],
-                        extension=file_metadata["extension"],
-                        mime_type=file_metadata["mime_type"],
-                        owner_id=user.id,
-                        content_hash=file_metadata["content_hash"],
-                        external_metadata=ext_metadata,
-                        node_set=json.dumps(node_set) if node_set else None,
-                        data_size=file_metadata["file_size"],
-                        tenant_id=user.tenant_id if user.tenant_id else None,
-                        token_count=-1,
-                    )
-                    new_datapoints.append(data_point)
+                    dataset_new_data_points.append(data_point)
                     dataset_data_map[str(data_point.id)] = True
+            else:
+                if str(data_id) in dataset_data_map:
+                    continue
+                data_point = Data(
+                    id=data_id,
+                    name=original_file_metadata["name"],
+                    raw_data_location=cognee_storage_file_path,
+                    original_data_location=original_file_metadata["file_path"],
+                    extension=storage_file_metadata["extension"],
+                    mime_type=storage_file_metadata["mime_type"],
+                    original_extension=original_file_metadata["extension"],
+                    original_mime_type=original_file_metadata["mime_type"],
+                    loader_engine=loader_engine.loader_name,
+                    owner_id=user.id,
+                    content_hash=original_file_metadata["content_hash"],
+                    raw_content_hash=storage_file_metadata["content_hash"],
+                    external_metadata=ext_metadata,
+                    node_set=json.dumps(node_set) if node_set else None,
+                    data_size=original_file_metadata["file_size"],
+                    tenant_id=user.tenant_id if user.tenant_id else None,
+                    pipeline_status={},
+                    token_count=-1,
+                )
+                new_datapoints.append(data_point)
+                dataset_data_map[str(data_point.id)] = True
         async with db_engine.get_async_session() as session:
             if dataset not in session:
@@ -160,4 +188,6 @@ async def ingest_data(
         return existing_data_points + dataset_new_data_points + new_datapoints
-    return await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)
+    return await store_data_to_dataset(
+        data, dataset_name, user, node_set, dataset_id, preferred_loaders
+    )

cognee/tasks/ingestion/resolve_data_directories.py CHANGED Viewed

@@ -40,6 +40,9 @@ async def resolve_data_directories(
                     if include_subdirectories:
                         base_path = item if item.endswith("/") else item + "/"
                         s3_keys = fs.glob(base_path + "**")
+                        # If path is not directory attempt to add item directly
+                        if not s3_keys:
+                            s3_keys = fs.ls(item)
                     else:
                         s3_keys = fs.ls(item)
                     # Filter out keys that represent directories using fs.isdir

cognee/tasks/repo_processor/get_repo_file_dependencies.py CHANGED Viewed

@@ -103,6 +103,9 @@ async def get_repo_file_dependencies(
           extraction of dependencies (default is False). (default False)
     """
+    if isinstance(repo_path, list) and len(repo_path) == 1:
+        repo_path = repo_path[0]
     if not os.path.exists(repo_path):
         raise FileNotFoundError(f"Repository path {repo_path} does not exist.")

cognee/tasks/storage/index_data_points.py CHANGED Viewed

@@ -38,7 +38,7 @@ async def index_data_points(data_points: list[DataPoint]):
         index_name = index_name_and_field[:first_occurence]
         field_name = index_name_and_field[first_occurence + 1 :]
         try:
-            # In case the ammount if indexable points is too large we need to send them in batches
+            # In case the amount of indexable points is too large we need to send them in batches
             batch_size = 100
             for i in range(0, len(indexable_points), batch_size):
                 batch = indexable_points[i : i + batch_size]

cognee/tasks/storage/index_graph_edges.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
 from cognee.shared.logging_utils import get_logger, ERROR
 from collections import Counter
@@ -49,7 +50,9 @@ async def index_graph_edges(batch_size: int = 1024):
     )
     for text, count in edge_types.items():
-        edge = EdgeType(relationship_name=text, number_of_edges=count)
+        edge = EdgeType(
+            id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count
+        )
         data_point_type = type(edge)
         for field_name in edge.metadata["index_fields"]:

cognee/tasks/summarization/summarize_code.py CHANGED Viewed

@@ -3,8 +3,7 @@ from typing import AsyncGenerator, Union
 from uuid import uuid5
 from cognee.infrastructure.engine import DataPoint
-from cognee.modules.data.extraction.extract_summary import extract_code_summary
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from .models import CodeSummary
@@ -17,7 +16,7 @@ async def summarize_code(
     code_data_points = [file for file in code_graph_nodes if hasattr(file, "source_code")]
     file_summaries = await asyncio.gather(
-        *[extract_code_summary(file.source_code) for file in code_data_points]
+        *[LLMGateway.extract_code_summary(file.source_code) for file in code_data_points]
     )
     file_summaries_map = {

cognee/tasks/summarization/summarize_text.py CHANGED Viewed

@@ -2,8 +2,9 @@ import asyncio
 from typing import Type
 from uuid import uuid5
 from pydantic import BaseModel
-from cognee.modules.data.extraction.extract_summary import extract_summary
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.modules.cognify.config import get_cognify_config
 from .models import TextSummary
@@ -42,7 +43,7 @@ async def summarize_text(
         summarization_model = cognee_config.summarization_model
     chunk_summaries = await asyncio.gather(
-        *[extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
+        *[LLMGateway.extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
     )
     summaries = [

cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

cognee 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl