cognee 0.5.0.dev0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +1 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +12 -37
- cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
- cognee/api/v1/search/search.py +8 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/context_global_variables.py +61 -16
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/graph/config.py +3 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +2 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +35 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_retriever.py +10 -0
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +4 -0
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +46 -18
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +15 -3
- cognee/shared/logging_utils.py +4 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/test_cognee_server_start.py +2 -4
- cognee/tests/test_conversation_history.py +23 -1
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_search_db.py +37 -1
- cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/search/test_search.py +100 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/METADATA +76 -89
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/RECORD +119 -97
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/WHEEL +1 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- cognee/tests/test_delete_bmw_example.py +0 -60
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pathlib
|
|
3
|
-
import asyncio
|
|
4
|
-
from typing import Optional
|
|
5
|
-
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
6
|
-
from cognee.modules.observability.get_observe import get_observe
|
|
7
|
-
|
|
8
|
-
from cognee.api.v1.search import SearchType, search
|
|
9
|
-
from cognee.api.v1.visualize.visualize import visualize_graph
|
|
10
|
-
from cognee.modules.cognify.config import get_cognify_config
|
|
11
|
-
from cognee.modules.pipelines import run_tasks
|
|
12
|
-
from cognee.modules.pipelines.tasks.task import Task
|
|
13
|
-
from cognee.modules.users.methods import get_default_user
|
|
14
|
-
from cognee.shared.data_models import KnowledgeGraph
|
|
15
|
-
from cognee.modules.data.methods import create_dataset
|
|
16
|
-
from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
|
|
17
|
-
from cognee.tasks.graph import extract_graph_from_data
|
|
18
|
-
from cognee.tasks.ingestion import ingest_data
|
|
19
|
-
from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
|
|
20
|
-
|
|
21
|
-
from cognee.tasks.storage import add_data_points
|
|
22
|
-
from cognee.tasks.summarization import summarize_text
|
|
23
|
-
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
24
|
-
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
25
|
-
|
|
26
|
-
observe = get_observe()
|
|
27
|
-
|
|
28
|
-
logger = get_logger("code_graph_pipeline")
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@observe
|
|
32
|
-
async def run_code_graph_pipeline(
|
|
33
|
-
repo_path,
|
|
34
|
-
include_docs=False,
|
|
35
|
-
excluded_paths: Optional[list[str]] = None,
|
|
36
|
-
supported_languages: Optional[list[str]] = None,
|
|
37
|
-
):
|
|
38
|
-
import cognee
|
|
39
|
-
from cognee.low_level import setup
|
|
40
|
-
|
|
41
|
-
await cognee.prune.prune_data()
|
|
42
|
-
await cognee.prune.prune_system(metadata=True)
|
|
43
|
-
await setup()
|
|
44
|
-
|
|
45
|
-
cognee_config = get_cognify_config()
|
|
46
|
-
user = await get_default_user()
|
|
47
|
-
detailed_extraction = True
|
|
48
|
-
|
|
49
|
-
tasks = [
|
|
50
|
-
Task(
|
|
51
|
-
get_repo_file_dependencies,
|
|
52
|
-
detailed_extraction=detailed_extraction,
|
|
53
|
-
supported_languages=supported_languages,
|
|
54
|
-
excluded_paths=excluded_paths,
|
|
55
|
-
),
|
|
56
|
-
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
57
|
-
Task(add_data_points, task_config={"batch_size": 30}),
|
|
58
|
-
]
|
|
59
|
-
|
|
60
|
-
if include_docs:
|
|
61
|
-
# This tasks take a long time to complete
|
|
62
|
-
non_code_tasks = [
|
|
63
|
-
Task(get_non_py_files, task_config={"batch_size": 50}),
|
|
64
|
-
Task(ingest_data, dataset_name="repo_docs", user=user),
|
|
65
|
-
Task(classify_documents),
|
|
66
|
-
Task(extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()),
|
|
67
|
-
Task(
|
|
68
|
-
extract_graph_from_data,
|
|
69
|
-
graph_model=KnowledgeGraph,
|
|
70
|
-
task_config={"batch_size": 50},
|
|
71
|
-
),
|
|
72
|
-
Task(
|
|
73
|
-
summarize_text,
|
|
74
|
-
summarization_model=cognee_config.summarization_model,
|
|
75
|
-
task_config={"batch_size": 50},
|
|
76
|
-
),
|
|
77
|
-
]
|
|
78
|
-
|
|
79
|
-
dataset_name = "codebase"
|
|
80
|
-
|
|
81
|
-
# Save dataset to database
|
|
82
|
-
db_engine = get_relational_engine()
|
|
83
|
-
async with db_engine.get_async_session() as session:
|
|
84
|
-
dataset = await create_dataset(dataset_name, user, session)
|
|
85
|
-
|
|
86
|
-
if include_docs:
|
|
87
|
-
non_code_pipeline_run = run_tasks(
|
|
88
|
-
non_code_tasks, dataset.id, repo_path, user, "cognify_pipeline"
|
|
89
|
-
)
|
|
90
|
-
async for run_status in non_code_pipeline_run:
|
|
91
|
-
yield run_status
|
|
92
|
-
|
|
93
|
-
async for run_status in run_tasks(
|
|
94
|
-
tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
|
|
95
|
-
):
|
|
96
|
-
yield run_status
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
if __name__ == "__main__":
|
|
100
|
-
|
|
101
|
-
async def main():
|
|
102
|
-
async for run_status in run_code_graph_pipeline("REPO_PATH"):
|
|
103
|
-
print(f"{run_status.pipeline_run_id}: {run_status.status}")
|
|
104
|
-
|
|
105
|
-
file_path = os.path.join(
|
|
106
|
-
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
|
|
107
|
-
)
|
|
108
|
-
await visualize_graph(file_path)
|
|
109
|
-
|
|
110
|
-
search_results = await search(
|
|
111
|
-
query_type=SearchType.CODE,
|
|
112
|
-
query_text="How is Relationship weight calculated?",
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
for file in search_results:
|
|
116
|
-
print(file["name"])
|
|
117
|
-
|
|
118
|
-
logger = setup_logging(name="code_graph_pipeline")
|
|
119
|
-
asyncio.run(main())
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from cognee.shared.logging_utils import get_logger
|
|
3
|
-
from fastapi import APIRouter
|
|
4
|
-
from fastapi.responses import JSONResponse
|
|
5
|
-
from cognee.api.DTO import InDTO
|
|
6
|
-
from cognee.modules.retrieval.code_retriever import CodeRetriever
|
|
7
|
-
from cognee.modules.storage.utils import JSONEncoder
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
logger = get_logger()
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class CodePipelineIndexPayloadDTO(InDTO):
|
|
14
|
-
repo_path: str
|
|
15
|
-
include_docs: bool = False
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class CodePipelineRetrievePayloadDTO(InDTO):
|
|
19
|
-
query: str
|
|
20
|
-
full_input: str
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def get_code_pipeline_router() -> APIRouter:
|
|
24
|
-
try:
|
|
25
|
-
import cognee.api.v1.cognify.code_graph_pipeline
|
|
26
|
-
except ModuleNotFoundError:
|
|
27
|
-
logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
|
|
28
|
-
return None
|
|
29
|
-
|
|
30
|
-
router = APIRouter()
|
|
31
|
-
|
|
32
|
-
@router.post("/index", response_model=None)
|
|
33
|
-
async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
|
|
34
|
-
"""
|
|
35
|
-
Run indexation on a code repository.
|
|
36
|
-
|
|
37
|
-
This endpoint processes a code repository to create a knowledge graph
|
|
38
|
-
of the codebase structure, dependencies, and relationships.
|
|
39
|
-
|
|
40
|
-
## Request Parameters
|
|
41
|
-
- **repo_path** (str): Path to the code repository
|
|
42
|
-
- **include_docs** (bool): Whether to include documentation files (default: false)
|
|
43
|
-
|
|
44
|
-
## Response
|
|
45
|
-
No content returned. Processing results are logged.
|
|
46
|
-
|
|
47
|
-
## Error Codes
|
|
48
|
-
- **409 Conflict**: Error during indexation process
|
|
49
|
-
"""
|
|
50
|
-
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
|
|
51
|
-
|
|
52
|
-
try:
|
|
53
|
-
async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
|
|
54
|
-
logger.info(result)
|
|
55
|
-
except Exception as error:
|
|
56
|
-
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
57
|
-
|
|
58
|
-
@router.post("/retrieve", response_model=list[dict])
|
|
59
|
-
async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO):
|
|
60
|
-
"""
|
|
61
|
-
Retrieve context from the code knowledge graph.
|
|
62
|
-
|
|
63
|
-
This endpoint searches the indexed code repository to find relevant
|
|
64
|
-
context based on the provided query.
|
|
65
|
-
|
|
66
|
-
## Request Parameters
|
|
67
|
-
- **query** (str): Search query for code context
|
|
68
|
-
- **full_input** (str): Full input text for processing
|
|
69
|
-
|
|
70
|
-
## Response
|
|
71
|
-
Returns a list of relevant code files and context as JSON.
|
|
72
|
-
|
|
73
|
-
## Error Codes
|
|
74
|
-
- **409 Conflict**: Error during retrieval process
|
|
75
|
-
"""
|
|
76
|
-
try:
|
|
77
|
-
query = (
|
|
78
|
-
payload.full_input.replace("cognee ", "")
|
|
79
|
-
if payload.full_input.startswith("cognee ")
|
|
80
|
-
else payload.full_input
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
retriever = CodeRetriever()
|
|
84
|
-
retrieved_files = await retriever.get_context(query)
|
|
85
|
-
|
|
86
|
-
return json.dumps(retrieved_files, cls=JSONEncoder)
|
|
87
|
-
except Exception as error:
|
|
88
|
-
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
89
|
-
|
|
90
|
-
return router
|