cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Assistant Guidelines
|
|
2
|
+
These rules are absolutely imperative to adhere to. Comply with them precisely as they are outlined.
|
|
3
|
+
|
|
4
|
+
The agent must use sequential thinking MCP tool to work out problems.
|
|
5
|
+
|
|
6
|
+
Core Behavior Guidelines
|
|
7
|
+
|
|
8
|
+
Respond only to explicit requests. Do not add files, code, tests, or comments unless asked.
|
|
9
|
+
|
|
10
|
+
Follow instructions precisely. No assumptions or speculative additions.
|
|
11
|
+
|
|
12
|
+
Use provided context accurately.
|
|
13
|
+
|
|
14
|
+
Avoid extra output. No debugging logs or test harnesses unless requested.
|
|
15
|
+
|
|
16
|
+
Produce clean, optimized code when code is requested. Respect existing style.
|
|
17
|
+
|
|
18
|
+
Deliver complete, standalone solutions. No placeholders.
|
|
19
|
+
|
|
20
|
+
Limit file creation. Only create new files when necessary.
|
|
21
|
+
|
|
22
|
+
If you modify the model in a user's code, you must confirm with the user and never be sneaky. Always tell the user exactly what you are doing.
|
|
23
|
+
|
|
24
|
+
Communication & Delivery
|
|
25
|
+
|
|
26
|
+
9. Don't explain unless asked. Do not expose reasoning in outputs.
|
|
27
|
+
10. If unsure, say "I don't know." Avoid hallucinated content.
|
|
28
|
+
11. Maintain consistency across sessions. Refer to project memory and documentation.
|
|
29
|
+
12. Respect privacy and permissions. Never leak or infer secure data.
|
|
30
|
+
13. Prioritize targeted edits over full rewrites.
|
|
31
|
+
14. Optimize incrementally. Avoid unnecessary overhauls.
|
|
32
|
+
|
|
33
|
+
Spec.md Requirement
|
|
34
|
+
|
|
35
|
+
You must maintain a file named Spec.md. This file acts as the single source of truth for the project.
|
|
36
|
+
|
|
37
|
+
Rules:
|
|
38
|
+
|
|
39
|
+
Before starting any implementation, check if Spec.md already exists.
|
|
40
|
+
|
|
41
|
+
If it does not exist, create one using the template provided below.
|
|
42
|
+
|
|
43
|
+
Always update Spec.md before and after any major change.
|
|
44
|
+
|
|
45
|
+
Use the contents of Spec.md to guide logic, structure, and implementation decisions.
|
|
46
|
+
|
|
47
|
+
When updating a section, condense previous content to keep the document concise.
|
|
48
|
+
|
|
49
|
+
Spec.md Starter Template (Plain Text Format)
|
|
50
|
+
|
|
51
|
+
Title: Spec.md – Project Specification
|
|
52
|
+
|
|
53
|
+
Section: Purpose
|
|
54
|
+
Describe the main goal of this feature, tool, or system.
|
|
55
|
+
|
|
56
|
+
Section: Core Functionality
|
|
57
|
+
List the key features, expected behaviors, and common use cases.
|
|
58
|
+
|
|
59
|
+
Section: Architecture Overview
|
|
60
|
+
Summarize the technical setup, frameworks used, and main modules or services.
|
|
61
|
+
|
|
62
|
+
Section: Input and Output Contracts
|
|
63
|
+
List all inputs and outputs in a table-like format:
|
|
64
|
+
|
|
65
|
+
Input: describe the input data, its format, and where it comes from.
|
|
66
|
+
|
|
67
|
+
Output: describe the output data, its format, and its destination.
|
|
68
|
+
|
|
69
|
+
Section: Edge Cases and Constraints
|
|
70
|
+
List known limitations, special scenarios, and fallback behaviors.
|
|
71
|
+
|
|
72
|
+
Section: File and Module Map
|
|
73
|
+
List all important files or modules and describe what each one is responsible for.
|
|
74
|
+
|
|
75
|
+
Section: Open Questions or TODOs
|
|
76
|
+
Create a checklist of unresolved decisions, logic that needs clarification, or tasks that are still pending.
|
|
77
|
+
|
|
78
|
+
Section: Last Updated
|
|
79
|
+
Include the most recent update date and who made the update.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# PEP 8 Style Guide: Essentials
|
|
2
|
+
|
|
3
|
+
## Code Layout
|
|
4
|
+
- Indentation: 4 spaces per level
|
|
5
|
+
- Line length: 79 for code (88/100 acceptable by team), 72 for comments/docstrings
|
|
6
|
+
- Blank lines: 2 around top-level defs/classes, 1 between methods
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
# Hanging indent for long calls
|
|
10
|
+
foo = long_function_name(
|
|
11
|
+
var_one, var_two,
|
|
12
|
+
var_three, var_four,
|
|
13
|
+
)
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Imports
|
|
17
|
+
- One import per line
|
|
18
|
+
- Group: stdlib, third-party, local
|
|
19
|
+
- Prefer absolute imports; avoid wildcard imports
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import os
|
|
23
|
+
import sys
|
|
24
|
+
from subprocess import Popen, PIPE
|
|
25
|
+
|
|
26
|
+
import requests
|
|
27
|
+
|
|
28
|
+
from myproject.models import User
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Whitespace
|
|
32
|
+
- No space inside brackets or before commas/semicolons
|
|
33
|
+
- Spaces around binary operators
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
x = 1
|
|
37
|
+
hypot2 = x * x + y * y
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Naming
|
|
41
|
+
- snake_case: functions, variables
|
|
42
|
+
- PascalCase: classes
|
|
43
|
+
- SCREAMING_SNAKE_CASE: constants
|
|
44
|
+
|
|
45
|
+
## Comments & Docstrings
|
|
46
|
+
- Use complete sentences; keep up to date
|
|
47
|
+
- Triple-double quotes for public modules, classes, functions
|
|
48
|
+
```python
|
|
49
|
+
def f(x: int) -> int:
|
|
50
|
+
"""Return x doubled."""
|
|
51
|
+
return x * 2
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Type Hints
|
|
55
|
+
- Space after colon; arrow for returns
|
|
56
|
+
```python
|
|
57
|
+
def munge(s: str) -> str: ...
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Tooling
|
|
61
|
+
- Black, isort, Flake8 (or Ruff) to automate style
|
|
62
|
+
- Example pyproject.toml excerpt:
|
|
63
|
+
```toml
|
|
64
|
+
[tool.black]
|
|
65
|
+
line-length = 88
|
|
66
|
+
|
|
67
|
+
[tool.isort]
|
|
68
|
+
profile = "black"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Common Violations
|
|
72
|
+
- E501: line too long -> break with parentheses
|
|
73
|
+
- E225: missing whitespace around operator
|
|
74
|
+
- E402: module import not at top of file
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# The Zen of Python: Practical Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
The Zen of Python (Tim Peters, import this) captures Python's philosophy. Use these principles as a checklist during design, coding, and reviews.
|
|
5
|
+
|
|
6
|
+
## Key Principles With Guidance
|
|
7
|
+
|
|
8
|
+
### 1. Beautiful is better than ugly
|
|
9
|
+
Prefer descriptive names, clear structure, and consistent formatting.
|
|
10
|
+
|
|
11
|
+
### 2. Explicit is better than implicit
|
|
12
|
+
Be clear about behavior, imports, and types.
|
|
13
|
+
```python
|
|
14
|
+
from datetime import datetime, timedelta
|
|
15
|
+
|
|
16
|
+
def get_future_date(days_ahead: int) -> datetime:
|
|
17
|
+
return datetime.now() + timedelta(days=days_ahead)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### 3. Simple is better than complex
|
|
21
|
+
Choose straightforward solutions first.
|
|
22
|
+
|
|
23
|
+
### 4. Complex is better than complicated
|
|
24
|
+
When complexity is needed, organize it with clear abstractions.
|
|
25
|
+
|
|
26
|
+
### 5. Flat is better than nested
|
|
27
|
+
Use early returns to reduce indentation.
|
|
28
|
+
|
|
29
|
+
### 6. Sparse is better than dense
|
|
30
|
+
Give code room to breathe with whitespace.
|
|
31
|
+
|
|
32
|
+
### 7. Readability counts
|
|
33
|
+
Optimize for human readers; add docstrings for nontrivial code.
|
|
34
|
+
|
|
35
|
+
### 8. Special cases aren't special enough to break the rules
|
|
36
|
+
Stay consistent; exceptions should be rare and justified.
|
|
37
|
+
|
|
38
|
+
### 9. Although practicality beats purity
|
|
39
|
+
Prefer practical solutions that teams can maintain.
|
|
40
|
+
|
|
41
|
+
### 10. Errors should never pass silently
|
|
42
|
+
Handle exceptions explicitly; log with context.
|
|
43
|
+
|
|
44
|
+
### 11. Unless explicitly silenced
|
|
45
|
+
Silence only specific, acceptable errors and document why.
|
|
46
|
+
|
|
47
|
+
### 12. In the face of ambiguity, refuse the temptation to guess
|
|
48
|
+
Require explicit inputs and behavior.
|
|
49
|
+
|
|
50
|
+
### 13. There should be one obvious way to do it
|
|
51
|
+
Prefer standard library patterns and idioms.
|
|
52
|
+
|
|
53
|
+
### 14. Although that way may not be obvious at first
|
|
54
|
+
Learn Python idioms; embrace clarity over novelty.
|
|
55
|
+
|
|
56
|
+
### 15. Now is better than never; 16. Never is often better than right now
|
|
57
|
+
Iterate, but don't rush broken code.
|
|
58
|
+
|
|
59
|
+
### 17/18. Hard to explain is bad; easy to explain is good
|
|
60
|
+
Prefer designs you can explain simply.
|
|
61
|
+
|
|
62
|
+
### 19. Namespaces are one honking great idea
|
|
63
|
+
Use modules/packages to separate concerns; avoid wildcard imports.
|
|
64
|
+
|
|
65
|
+
## Modern Python Tie-ins
|
|
66
|
+
- Type hints reinforce explicitness
|
|
67
|
+
- Context managers enforce safe resource handling
|
|
68
|
+
- Dataclasses improve readability for data containers
|
|
69
|
+
|
|
70
|
+
## Quick Review Checklist
|
|
71
|
+
- Is it readable and explicit?
|
|
72
|
+
- Is this the simplest working solution?
|
|
73
|
+
- Are errors explicit and logged?
|
|
74
|
+
- Are modules/namespaces used appropriately?
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Optional, List, Type
|
|
2
|
+
from typing import Any, Optional, List, Type, Union
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
|
|
@@ -40,19 +40,21 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
40
40
|
context_provider: BaseContextProvider,
|
|
41
41
|
user_prompt_path: str = "context_for_question.txt",
|
|
42
42
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
43
|
+
session_id: Optional[str] = None,
|
|
44
|
+
response_model: Type = str,
|
|
43
45
|
):
|
|
44
46
|
self.extractor = extractor
|
|
45
47
|
self.context_provider = context_provider
|
|
46
48
|
self.user_prompt_path = user_prompt_path
|
|
47
49
|
self.system_prompt_path = system_prompt_path
|
|
50
|
+
self.session_id = session_id
|
|
51
|
+
self.response_model = response_model
|
|
48
52
|
|
|
49
|
-
async def
|
|
53
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
50
54
|
"""
|
|
51
|
-
Get
|
|
55
|
+
Get relevant objects from the provided query.
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
attempts to retrieve the corresponding context using the context provider. Returns None
|
|
55
|
-
if no entities or context are found, or logs the error if an exception occurs.
|
|
57
|
+
Extracts and returns entities from the provided query, returning None if no entities are found.
|
|
56
58
|
|
|
57
59
|
Parameters:
|
|
58
60
|
-----------
|
|
@@ -62,8 +64,8 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
62
64
|
Returns:
|
|
63
65
|
--------
|
|
64
66
|
|
|
65
|
-
- Any: The
|
|
66
|
-
|
|
67
|
+
- Any: The extracted entities, or None if no entities are found.
|
|
68
|
+
|
|
67
69
|
"""
|
|
68
70
|
try:
|
|
69
71
|
logger.info(f"Processing query: {query[:100]}")
|
|
@@ -73,40 +75,57 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
73
75
|
logger.info("No entities extracted")
|
|
74
76
|
return None
|
|
75
77
|
|
|
76
|
-
|
|
78
|
+
return entities
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.error(f"Context retrieval failed: {str(e)}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
85
|
+
"""
|
|
86
|
+
Get context using the extracted entities and a context provider.
|
|
87
|
+
|
|
88
|
+
Retrieves the context corresponding to the retrieved entities in retrieved_objects.
|
|
89
|
+
Returns and empty string if no context is retrieved.
|
|
90
|
+
|
|
91
|
+
Parameters:
|
|
92
|
+
-----------
|
|
93
|
+
|
|
94
|
+
- query (str): The query string for which context is being retrieved.
|
|
95
|
+
- retrieved_objects (Any): The retrieved entities extracted from the query.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
--------
|
|
99
|
+
|
|
100
|
+
- str: The context retrieved from the context provider or an empty string
|
|
101
|
+
if not found or an error occurred.
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
logger.info(f"Processing query: {query[:100]}")
|
|
105
|
+
|
|
106
|
+
context = await self.context_provider.get_context(retrieved_objects, query)
|
|
77
107
|
if not context:
|
|
78
108
|
logger.info("No context retrieved")
|
|
79
|
-
return
|
|
109
|
+
return ""
|
|
80
110
|
|
|
81
111
|
return context
|
|
82
112
|
|
|
83
113
|
except Exception as e:
|
|
84
114
|
logger.error(f"Context retrieval failed: {str(e)}")
|
|
85
|
-
return
|
|
115
|
+
return ""
|
|
86
116
|
|
|
87
|
-
async def
|
|
88
|
-
self,
|
|
89
|
-
|
|
90
|
-
context: Optional[Any] = None,
|
|
91
|
-
session_id: Optional[str] = None,
|
|
92
|
-
response_model: Type = str,
|
|
93
|
-
) -> List[Any]:
|
|
117
|
+
async def get_completion_from_context(
|
|
118
|
+
self, query: str, retrieved_objects: Any, context: Any
|
|
119
|
+
) -> Union[List[str], List[dict]]:
|
|
94
120
|
"""
|
|
95
|
-
Generate completion using provided context
|
|
96
|
-
|
|
97
|
-
If context is not provided, it fetches context using the query. If no context is
|
|
98
|
-
available, it returns an error message. Logs an error if completion generation fails due
|
|
99
|
-
to an exception.
|
|
121
|
+
Generate completion using provided context.
|
|
100
122
|
|
|
101
123
|
Parameters:
|
|
102
124
|
-----------
|
|
103
125
|
|
|
104
126
|
- query (str): The query string for which completion is being generated.
|
|
105
|
-
-
|
|
106
|
-
|
|
107
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
108
|
-
defaults to 'default_session'. (default None)
|
|
109
|
-
- response_model (Type): The Pydantic model type for structured output. (default str)
|
|
127
|
+
- retrieved_objects (Any): The retrieved objects extracted from the query.
|
|
128
|
+
- context (Any): Optional context to be used for generating completion.
|
|
110
129
|
|
|
111
130
|
Returns:
|
|
112
131
|
--------
|
|
@@ -115,12 +134,6 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
115
134
|
relevant entities were found.
|
|
116
135
|
"""
|
|
117
136
|
try:
|
|
118
|
-
if context is None:
|
|
119
|
-
context = await self.get_context(query)
|
|
120
|
-
|
|
121
|
-
if context is None:
|
|
122
|
-
return ["No relevant entities found for the query."]
|
|
123
|
-
|
|
124
137
|
# Check if we need to generate context summary for caching
|
|
125
138
|
cache_config = CacheConfig()
|
|
126
139
|
user = session_user.get()
|
|
@@ -128,7 +141,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
128
141
|
session_save = user_id and cache_config.caching
|
|
129
142
|
|
|
130
143
|
if session_save:
|
|
131
|
-
conversation_history = await get_conversation_history(session_id=session_id)
|
|
144
|
+
conversation_history = await get_conversation_history(session_id=self.session_id)
|
|
132
145
|
|
|
133
146
|
context_summary, completion = await asyncio.gather(
|
|
134
147
|
summarize_text(str(context)),
|
|
@@ -138,7 +151,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
138
151
|
user_prompt_path=self.user_prompt_path,
|
|
139
152
|
system_prompt_path=self.system_prompt_path,
|
|
140
153
|
conversation_history=conversation_history,
|
|
141
|
-
response_model=response_model,
|
|
154
|
+
response_model=self.response_model,
|
|
142
155
|
),
|
|
143
156
|
)
|
|
144
157
|
else:
|
|
@@ -147,7 +160,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
147
160
|
context=context,
|
|
148
161
|
user_prompt_path=self.user_prompt_path,
|
|
149
162
|
system_prompt_path=self.system_prompt_path,
|
|
150
|
-
response_model=response_model,
|
|
163
|
+
response_model=self.response_model,
|
|
151
164
|
)
|
|
152
165
|
|
|
153
166
|
if session_save:
|
|
@@ -155,7 +168,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
155
168
|
query=query,
|
|
156
169
|
context_summary=context_summary,
|
|
157
170
|
answer=completion,
|
|
158
|
-
session_id=session_id,
|
|
171
|
+
session_id=self.session_id,
|
|
159
172
|
)
|
|
160
173
|
|
|
161
174
|
return [completion]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1,22 +1,78 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any, Optional, Type, List
|
|
2
|
+
from typing import Any, Optional, Type, List, Union
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class BaseRetriever(ABC):
|
|
6
|
-
"""
|
|
6
|
+
"""
|
|
7
|
+
Base class for all retrieval operations.
|
|
8
|
+
|
|
9
|
+
The retrieval workflow follows a three-step pipeline:
|
|
10
|
+
1. get_retrieved_objects: Fetch raw data (e.g., Graph Edges, Vector chunks).
|
|
11
|
+
2. get_context: Process raw data into a format suitable for an LLM (e.g., text string).
|
|
12
|
+
3. get_completion: Generate a final response with the help of an LLM using the context and original query.
|
|
13
|
+
"""
|
|
7
14
|
|
|
8
15
|
@abstractmethod
|
|
9
|
-
async def
|
|
10
|
-
"""
|
|
16
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
17
|
+
"""
|
|
18
|
+
Retrieves the raw data points from the underlying storage (Graph or Vector DB).
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
query (str): The search query or input string.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
List[Any]: A list of raw objects (e.g., Edge objects, Document chunks)
|
|
25
|
+
relevant to the query.
|
|
26
|
+
"""
|
|
11
27
|
pass
|
|
12
28
|
|
|
13
29
|
@abstractmethod
|
|
14
|
-
async def
|
|
30
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Transforms raw retrieved objects into a structured context for the LLM.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
query (str): The search query or input string.
|
|
36
|
+
retrieved_objects (List[Any]): The output from get_retrieved_objects.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Any: The formatted context (typically a string or a list of strings)
|
|
40
|
+
to be injected into a prompt.
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def get_completion_from_context(
|
|
15
46
|
self,
|
|
16
47
|
query: str,
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
48
|
+
retrieved_objects: Any,
|
|
49
|
+
context: Any,
|
|
50
|
+
) -> Union[List[str], List[dict]]:
|
|
51
|
+
"""
|
|
52
|
+
Generates a final output or answer based on the query and retrieved context.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
query (str): The original user query.
|
|
56
|
+
retrieved_objects (List[Any]): The output from get_retrieved_objects.
|
|
57
|
+
context (Optional[Any]): The formatted context string/data used to
|
|
58
|
+
augment the generation. Output from get_context_from_objects.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List[Any]: A list containing the generated completions or response objects.
|
|
62
|
+
"""
|
|
22
63
|
pass
|
|
64
|
+
|
|
65
|
+
async def get_completion(self, query: str) -> Union[List[str], List[dict]]:
|
|
66
|
+
"""
|
|
67
|
+
Generates a final output or answer based on the query and retrieved context.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
query (str): The original user query.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
List[Any]: A list containing the generated completions or response objects.
|
|
74
|
+
"""
|
|
75
|
+
retrieved_objects = await self.get_retrieved_objects(query)
|
|
76
|
+
context = await self.get_context_from_objects(query, retrieved_objects)
|
|
77
|
+
completion = await self.get_completion_from_context(query, retrieved_objects, context)
|
|
78
|
+
return completion
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
2
|
-
|
|
1
|
+
from typing import Any, Optional, List, Union
|
|
2
|
+
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
5
5
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
6
6
|
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
7
7
|
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
|
8
|
+
from datetime import datetime, timezone
|
|
8
9
|
|
|
9
10
|
logger = get_logger("ChunksRetriever")
|
|
10
11
|
|
|
@@ -27,75 +28,82 @@ class ChunksRetriever(BaseRetriever):
|
|
|
27
28
|
):
|
|
28
29
|
self.top_k = top_k
|
|
29
30
|
|
|
30
|
-
async def
|
|
31
|
+
async def get_completion_from_context(
|
|
32
|
+
self, query: str, retrieved_objects: Any, context: Any
|
|
33
|
+
) -> Union[List[str], List[dict]]:
|
|
31
34
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
Raises a NoDataError if no data is found in the system.
|
|
35
|
+
Generates a completion using document chunks context.
|
|
36
|
+
In case of the Chunks Retriever, we do not generate a completion, we just return
|
|
37
|
+
the payloads of found chunks.
|
|
36
38
|
|
|
37
39
|
Parameters:
|
|
38
40
|
-----------
|
|
39
41
|
|
|
40
|
-
- query (str): The query string to
|
|
42
|
+
- query (str): The query string to be used for generating a completion.
|
|
43
|
+
- retrieved_objects (Any): The retrieved objects to be used for generating a completion.
|
|
44
|
+
- context (Any): The context to be used for generating a completion.
|
|
41
45
|
|
|
42
46
|
Returns:
|
|
43
47
|
--------
|
|
44
48
|
|
|
45
|
-
-
|
|
49
|
+
- List[dict]: A list of payloads of found chunks.
|
|
46
50
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
|
|
55
|
-
logger.info(f"Found {len(found_chunks)} chunks from vector search")
|
|
56
|
-
except CollectionNotFoundError as error:
|
|
57
|
-
logger.error("DocumentChunk_text collection not found in vector database")
|
|
58
|
-
raise NoDataError("No data found in the system, please add data first.") from error
|
|
59
|
-
|
|
60
|
-
chunk_payloads = [result.payload for result in found_chunks]
|
|
61
|
-
logger.info(f"Returning {len(chunk_payloads)} chunk payloads")
|
|
62
|
-
return chunk_payloads
|
|
51
|
+
# TODO: Do we want to generate a completion using LLM here?
|
|
52
|
+
if retrieved_objects:
|
|
53
|
+
chunk_payloads = [found_chunk.payload for found_chunk in retrieved_objects]
|
|
54
|
+
return chunk_payloads
|
|
55
|
+
else:
|
|
56
|
+
return []
|
|
63
57
|
|
|
64
|
-
async def
|
|
65
|
-
self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
|
|
66
|
-
) -> Any:
|
|
58
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
67
59
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
If the context is not provided, it retrieves the context based on the query. Returns the
|
|
71
|
-
context, which can be used for further processing or generation of outputs.
|
|
60
|
+
Retrieves context from retrieved chunks, in text form.
|
|
72
61
|
|
|
73
62
|
Parameters:
|
|
74
63
|
-----------
|
|
75
64
|
|
|
76
|
-
- query (str): The query string to
|
|
77
|
-
-
|
|
78
|
-
completion; if None, it retrieves the context for the query. (default None)
|
|
79
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
80
|
-
defaults to 'default_session'. (default None)
|
|
65
|
+
- query (str): The query string used to search for relevant document chunks.
|
|
66
|
+
- retrieved_objects (Any): The retrieved objects to be used for generating textual context.
|
|
81
67
|
|
|
82
68
|
Returns:
|
|
83
69
|
--------
|
|
84
70
|
|
|
85
|
-
-
|
|
86
|
-
|
|
71
|
+
- str: A string containing the combined text of the retrieved chunks, or an
|
|
72
|
+
empty string if none are found.
|
|
87
73
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if context is None:
|
|
93
|
-
logger.debug("No context provided, retrieving context from vector database")
|
|
94
|
-
context = await self.get_context(query)
|
|
74
|
+
if retrieved_objects:
|
|
75
|
+
chunk_payload_texts = [found_chunk.payload["text"] for found_chunk in retrieved_objects]
|
|
76
|
+
return "\n".join(chunk_payload_texts)
|
|
95
77
|
else:
|
|
96
|
-
|
|
78
|
+
return ""
|
|
97
79
|
|
|
80
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
81
|
+
"""
|
|
82
|
+
Retrieves document chunks context based on the query.
|
|
83
|
+
Searches for document chunks relevant to the specified query using a vector engine.
|
|
84
|
+
Raises a NoDataError if no data is found in the system.
|
|
85
|
+
Parameters:
|
|
86
|
+
-----------
|
|
87
|
+
- query (str): The query string to search for relevant document chunks.
|
|
88
|
+
Returns:
|
|
89
|
+
--------
|
|
90
|
+
- Any: A list of document chunks retrieved from the search.
|
|
91
|
+
"""
|
|
98
92
|
logger.info(
|
|
99
|
-
f"
|
|
93
|
+
f"Starting chunk retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
100
94
|
)
|
|
101
|
-
|
|
95
|
+
|
|
96
|
+
vector_engine = get_vector_engine()
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
found_chunks = await vector_engine.search(
|
|
100
|
+
"DocumentChunk_text", query, limit=self.top_k, include_payload=True
|
|
101
|
+
)
|
|
102
|
+
logger.info(f"Found {len(found_chunks)} chunks from vector search")
|
|
103
|
+
await update_node_access_timestamps(found_chunks)
|
|
104
|
+
|
|
105
|
+
return found_chunks
|
|
106
|
+
|
|
107
|
+
except CollectionNotFoundError as error:
|
|
108
|
+
logger.error("DocumentChunk_text collection not found in vector database")
|
|
109
|
+
raise NoDataError("No data found in the system, please add data first.") from error
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from functools import reduce
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Any
|
|
4
4
|
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
5
6
|
from cognee.tasks.codingagents.coding_rule_associations import get_existing_rules
|
|
6
7
|
|
|
7
8
|
logger = get_logger("CodingRulesRetriever")
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
class CodingRulesRetriever:
|
|
11
|
+
class CodingRulesRetriever(BaseRetriever):
|
|
11
12
|
"""Retriever for handling codeing rule based searches."""
|
|
12
13
|
|
|
13
14
|
def __init__(self, rules_nodeset_name: Optional[List[str]] = None):
|
|
14
|
-
if isinstance(rules_nodeset_name, list):
|
|
15
|
+
if isinstance(rules_nodeset_name, list) or rules_nodeset_name is None:
|
|
15
16
|
if not rules_nodeset_name:
|
|
16
17
|
# If there is no provided nodeset set to coding_agent_rules
|
|
17
18
|
rules_nodeset_name = ["coding_agent_rules"]
|
|
@@ -19,7 +20,7 @@ class CodingRulesRetriever:
|
|
|
19
20
|
self.rules_nodeset_name = rules_nodeset_name
|
|
20
21
|
"""Initialize retriever with search parameters."""
|
|
21
22
|
|
|
22
|
-
async def
|
|
23
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
23
24
|
if self.rules_nodeset_name:
|
|
24
25
|
rules_list = await asyncio.gather(
|
|
25
26
|
*[
|
|
@@ -27,5 +28,11 @@ class CodingRulesRetriever:
|
|
|
27
28
|
for nodeset in self.rules_nodeset_name
|
|
28
29
|
]
|
|
29
30
|
)
|
|
30
|
-
|
|
31
31
|
return reduce(lambda x, y: x + y, rules_list, [])
|
|
32
|
+
|
|
33
|
+
async def get_context_from_objects(self, query, retrieved_objects):
|
|
34
|
+
return retrieved_objects
|
|
35
|
+
|
|
36
|
+
async def get_completion_from_context(self, query, retrieved_objects, context):
|
|
37
|
+
# TODO: Add completion generation logic if needed
|
|
38
|
+
return context
|