cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,79 @@
1
+ Assistant Guidelines
2
+ These rules are absolutely imperative to adhere to. Comply with them precisely as they are outlined.
3
+
4
+ The agent must use sequential thinking MCP tool to work out problems.
5
+
6
+ Core Behavior Guidelines
7
+
8
+ Respond only to explicit requests. Do not add files, code, tests, or comments unless asked.
9
+
10
+ Follow instructions precisely. No assumptions or speculative additions.
11
+
12
+ Use provided context accurately.
13
+
14
+ Avoid extra output. No debugging logs or test harnesses unless requested.
15
+
16
+ Produce clean, optimized code when code is requested. Respect existing style.
17
+
18
+ Deliver complete, standalone solutions. No placeholders.
19
+
20
+ Limit file creation. Only create new files when necessary.
21
+
22
+ If you modify the model in a user's code, you must confirm with the user and never be sneaky. Always tell the user exactly what you are doing.
23
+
24
+ Communication & Delivery
25
+
26
+ 9. Don't explain unless asked. Do not expose reasoning in outputs.
27
+ 10. If unsure, say "I don't know." Avoid hallucinated content.
28
+ 11. Maintain consistency across sessions. Refer to project memory and documentation.
29
+ 12. Respect privacy and permissions. Never leak or infer secure data.
30
+ 13. Prioritize targeted edits over full rewrites.
31
+ 14. Optimize incrementally. Avoid unnecessary overhauls.
32
+
33
+ Spec.md Requirement
34
+
35
+ You must maintain a file named Spec.md. This file acts as the single source of truth for the project.
36
+
37
+ Rules:
38
+
39
+ Before starting any implementation, check if Spec.md already exists.
40
+
41
+ If it does not exist, create one using the template provided below.
42
+
43
+ Always update Spec.md before and after any major change.
44
+
45
+ Use the contents of Spec.md to guide logic, structure, and implementation decisions.
46
+
47
+ When updating a section, condense previous content to keep the document concise.
48
+
49
+ Spec.md Starter Template (Plain Text Format)
50
+
51
+ Title: Spec.md – Project Specification
52
+
53
+ Section: Purpose
54
+ Describe the main goal of this feature, tool, or system.
55
+
56
+ Section: Core Functionality
57
+ List the key features, expected behaviors, and common use cases.
58
+
59
+ Section: Architecture Overview
60
+ Summarize the technical setup, frameworks used, and main modules or services.
61
+
62
+ Section: Input and Output Contracts
63
+ List all inputs and outputs in a table-like format:
64
+
65
+ Input: describe the input data, its format, and where it comes from.
66
+
67
+ Output: describe the output data, its format, and its destination.
68
+
69
+ Section: Edge Cases and Constraints
70
+ List known limitations, special scenarios, and fallback behaviors.
71
+
72
+ Section: File and Module Map
73
+ List all important files or modules and describe what each one is responsible for.
74
+
75
+ Section: Open Questions or TODOs
76
+ Create a checklist of unresolved decisions, logic that needs clarification, or tasks that are still pending.
77
+
78
+ Section: Last Updated
79
+ Include the most recent update date and who made the update.
@@ -0,0 +1,74 @@
1
+ # PEP 8 Style Guide: Essentials
2
+
3
+ ## Code Layout
4
+ - Indentation: 4 spaces per level
5
+ - Line length: 79 for code (88/100 acceptable by team), 72 for comments/docstrings
6
+ - Blank lines: 2 around top-level defs/classes, 1 between methods
7
+
8
+ ```python
9
+ # Hanging indent for long calls
10
+ foo = long_function_name(
11
+ var_one, var_two,
12
+ var_three, var_four,
13
+ )
14
+ ```
15
+
16
+ ## Imports
17
+ - One import per line
18
+ - Group: stdlib, third-party, local
19
+ - Prefer absolute imports; avoid wildcard imports
20
+
21
+ ```python
22
+ import os
23
+ import sys
24
+ from subprocess import Popen, PIPE
25
+
26
+ import requests
27
+
28
+ from myproject.models import User
29
+ ```
30
+
31
+ ## Whitespace
32
+ - No space inside brackets or before commas/semicolons
33
+ - Spaces around binary operators
34
+
35
+ ```python
36
+ x = 1
37
+ hypot2 = x * x + y * y
38
+ ```
39
+
40
+ ## Naming
41
+ - snake_case: functions, variables
42
+ - PascalCase: classes
43
+ - SCREAMING_SNAKE_CASE: constants
44
+
45
+ ## Comments & Docstrings
46
+ - Use complete sentences; keep up to date
47
+ - Triple-double quotes for public modules, classes, functions
48
+ ```python
49
+ def f(x: int) -> int:
50
+ """Return x doubled."""
51
+ return x * 2
52
+ ```
53
+
54
+ ## Type Hints
55
+ - Space after colon; arrow for returns
56
+ ```python
57
+ def munge(s: str) -> str: ...
58
+ ```
59
+
60
+ ## Tooling
61
+ - Black, isort, Flake8 (or Ruff) to automate style
62
+ - Example pyproject.toml excerpt:
63
+ ```toml
64
+ [tool.black]
65
+ line-length = 88
66
+
67
+ [tool.isort]
68
+ profile = "black"
69
+ ```
70
+
71
+ ## Common Violations
72
+ - E501: line too long -> break with parentheses
73
+ - E225: missing whitespace around operator
74
+ - E402: module import not at top of file
@@ -0,0 +1,74 @@
1
+ # The Zen of Python: Practical Guide
2
+
3
+ ## Overview
4
+ The Zen of Python (Tim Peters, import this) captures Python's philosophy. Use these principles as a checklist during design, coding, and reviews.
5
+
6
+ ## Key Principles With Guidance
7
+
8
+ ### 1. Beautiful is better than ugly
9
+ Prefer descriptive names, clear structure, and consistent formatting.
10
+
11
+ ### 2. Explicit is better than implicit
12
+ Be clear about behavior, imports, and types.
13
+ ```python
14
+ from datetime import datetime, timedelta
15
+
16
+ def get_future_date(days_ahead: int) -> datetime:
17
+ return datetime.now() + timedelta(days=days_ahead)
18
+ ```
19
+
20
+ ### 3. Simple is better than complex
21
+ Choose straightforward solutions first.
22
+
23
+ ### 4. Complex is better than complicated
24
+ When complexity is needed, organize it with clear abstractions.
25
+
26
+ ### 5. Flat is better than nested
27
+ Use early returns to reduce indentation.
28
+
29
+ ### 6. Sparse is better than dense
30
+ Give code room to breathe with whitespace.
31
+
32
+ ### 7. Readability counts
33
+ Optimize for human readers; add docstrings for nontrivial code.
34
+
35
+ ### 8. Special cases aren't special enough to break the rules
36
+ Stay consistent; exceptions should be rare and justified.
37
+
38
+ ### 9. Although practicality beats purity
39
+ Prefer practical solutions that teams can maintain.
40
+
41
+ ### 10. Errors should never pass silently
42
+ Handle exceptions explicitly; log with context.
43
+
44
+ ### 11. Unless explicitly silenced
45
+ Silence only specific, acceptable errors and document why.
46
+
47
+ ### 12. In the face of ambiguity, refuse the temptation to guess
48
+ Require explicit inputs and behavior.
49
+
50
+ ### 13. There should be one obvious way to do it
51
+ Prefer standard library patterns and idioms.
52
+
53
+ ### 14. Although that way may not be obvious at first
54
+ Learn Python idioms; embrace clarity over novelty.
55
+
56
+ ### 15. Now is better than never; 16. Never is often better than right now
57
+ Iterate, but don't rush broken code.
58
+
59
+ ### 17/18. Hard to explain is bad; easy to explain is good
60
+ Prefer designs you can explain simply.
61
+
62
+ ### 19. Namespaces are one honking great idea
63
+ Use modules/packages to separate concerns; avoid wildcard imports.
64
+
65
+ ## Modern Python Tie-ins
66
+ - Type hints reinforce explicitness
67
+ - Context managers enforce safe resource handling
68
+ - Dataclasses improve readability for data containers
69
+
70
+ ## Quick Review Checklist
71
+ - Is it readable and explicit?
72
+ - Is this the simplest working solution?
73
+ - Are errors explicit and logged?
74
+ - Are modules/namespaces used appropriately?
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Any, Optional, List, Type
2
+ from typing import Any, Optional, List, Type, Union
3
3
  from cognee.shared.logging_utils import get_logger
4
4
 
5
5
  from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
@@ -40,19 +40,21 @@ class EntityCompletionRetriever(BaseRetriever):
40
40
  context_provider: BaseContextProvider,
41
41
  user_prompt_path: str = "context_for_question.txt",
42
42
  system_prompt_path: str = "answer_simple_question.txt",
43
+ session_id: Optional[str] = None,
44
+ response_model: Type = str,
43
45
  ):
44
46
  self.extractor = extractor
45
47
  self.context_provider = context_provider
46
48
  self.user_prompt_path = user_prompt_path
47
49
  self.system_prompt_path = system_prompt_path
50
+ self.session_id = session_id
51
+ self.response_model = response_model
48
52
 
49
- async def get_context(self, query: str) -> Any:
53
+ async def get_retrieved_objects(self, query: str) -> Any:
50
54
  """
51
- Get context using entity extraction and context provider.
55
+ Get relevant objects from the provided query.
52
56
 
53
- Logs the processing of the query and retrieves entities. If entities are extracted, it
54
- attempts to retrieve the corresponding context using the context provider. Returns None
55
- if no entities or context are found, or logs the error if an exception occurs.
57
+ Extracts and returns entities from the provided query, returning None if no entities are found.
56
58
 
57
59
  Parameters:
58
60
  -----------
@@ -62,8 +64,8 @@ class EntityCompletionRetriever(BaseRetriever):
62
64
  Returns:
63
65
  --------
64
66
 
65
- - Any: The context retrieved from the context provider or None if not found or an
66
- error occurred.
67
+ - Any: The extracted entities, or None if no entities are found.
68
+
67
69
  """
68
70
  try:
69
71
  logger.info(f"Processing query: {query[:100]}")
@@ -73,40 +75,57 @@ class EntityCompletionRetriever(BaseRetriever):
73
75
  logger.info("No entities extracted")
74
76
  return None
75
77
 
76
- context = await self.context_provider.get_context(entities, query)
78
+ return entities
79
+
80
+ except Exception as e:
81
+ logger.error(f"Context retrieval failed: {str(e)}")
82
+ return None
83
+
84
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
85
+ """
86
+ Get context using the extracted entities and a context provider.
87
+
88
+ Retrieves the context corresponding to the retrieved entities in retrieved_objects.
89
+ Returns and empty string if no context is retrieved.
90
+
91
+ Parameters:
92
+ -----------
93
+
94
+ - query (str): The query string for which context is being retrieved.
95
+ - retrieved_objects (Any): The retrieved entities extracted from the query.
96
+
97
+ Returns:
98
+ --------
99
+
100
+ - str: The context retrieved from the context provider or an empty string
101
+ if not found or an error occurred.
102
+ """
103
+ try:
104
+ logger.info(f"Processing query: {query[:100]}")
105
+
106
+ context = await self.context_provider.get_context(retrieved_objects, query)
77
107
  if not context:
78
108
  logger.info("No context retrieved")
79
- return None
109
+ return ""
80
110
 
81
111
  return context
82
112
 
83
113
  except Exception as e:
84
114
  logger.error(f"Context retrieval failed: {str(e)}")
85
- return None
115
+ return ""
86
116
 
87
- async def get_completion(
88
- self,
89
- query: str,
90
- context: Optional[Any] = None,
91
- session_id: Optional[str] = None,
92
- response_model: Type = str,
93
- ) -> List[Any]:
117
+ async def get_completion_from_context(
118
+ self, query: str, retrieved_objects: Any, context: Any
119
+ ) -> Union[List[str], List[dict]]:
94
120
  """
95
- Generate completion using provided context or fetch new context.
96
-
97
- If context is not provided, it fetches context using the query. If no context is
98
- available, it returns an error message. Logs an error if completion generation fails due
99
- to an exception.
121
+ Generate completion using provided context.
100
122
 
101
123
  Parameters:
102
124
  -----------
103
125
 
104
126
  - query (str): The query string for which completion is being generated.
105
- - context (Optional[Any]): Optional context to be used for generating completion;
106
- fetched if not provided. (default None)
107
- - session_id (Optional[str]): Optional session identifier for caching. If None,
108
- defaults to 'default_session'. (default None)
109
- - response_model (Type): The Pydantic model type for structured output. (default str)
127
+ - retrieved_objects (Any): The retrieved objects extracted from the query.
128
+ - context (Any): Optional context to be used for generating completion.
110
129
 
111
130
  Returns:
112
131
  --------
@@ -115,12 +134,6 @@ class EntityCompletionRetriever(BaseRetriever):
115
134
  relevant entities were found.
116
135
  """
117
136
  try:
118
- if context is None:
119
- context = await self.get_context(query)
120
-
121
- if context is None:
122
- return ["No relevant entities found for the query."]
123
-
124
137
  # Check if we need to generate context summary for caching
125
138
  cache_config = CacheConfig()
126
139
  user = session_user.get()
@@ -128,7 +141,7 @@ class EntityCompletionRetriever(BaseRetriever):
128
141
  session_save = user_id and cache_config.caching
129
142
 
130
143
  if session_save:
131
- conversation_history = await get_conversation_history(session_id=session_id)
144
+ conversation_history = await get_conversation_history(session_id=self.session_id)
132
145
 
133
146
  context_summary, completion = await asyncio.gather(
134
147
  summarize_text(str(context)),
@@ -138,7 +151,7 @@ class EntityCompletionRetriever(BaseRetriever):
138
151
  user_prompt_path=self.user_prompt_path,
139
152
  system_prompt_path=self.system_prompt_path,
140
153
  conversation_history=conversation_history,
141
- response_model=response_model,
154
+ response_model=self.response_model,
142
155
  ),
143
156
  )
144
157
  else:
@@ -147,7 +160,7 @@ class EntityCompletionRetriever(BaseRetriever):
147
160
  context=context,
148
161
  user_prompt_path=self.user_prompt_path,
149
162
  system_prompt_path=self.system_prompt_path,
150
- response_model=response_model,
163
+ response_model=self.response_model,
151
164
  )
152
165
 
153
166
  if session_save:
@@ -155,7 +168,7 @@ class EntityCompletionRetriever(BaseRetriever):
155
168
  query=query,
156
169
  context_summary=context_summary,
157
170
  answer=completion,
158
- session_id=session_id,
171
+ session_id=self.session_id,
159
172
  )
160
173
 
161
174
  return [completion]
@@ -1 +0,0 @@
1
-
@@ -1,22 +1,78 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Optional, Type, List
2
+ from typing import Any, Optional, Type, List, Union
3
3
 
4
4
 
5
5
  class BaseRetriever(ABC):
6
- """Base class for all retrieval operations."""
6
+ """
7
+ Base class for all retrieval operations.
8
+
9
+ The retrieval workflow follows a three-step pipeline:
10
+ 1. get_retrieved_objects: Fetch raw data (e.g., Graph Edges, Vector chunks).
11
+ 2. get_context: Process raw data into a format suitable for an LLM (e.g., text string).
12
+ 3. get_completion: Generate a final response with the help of an LLM using the context and original query.
13
+ """
7
14
 
8
15
  @abstractmethod
9
- async def get_context(self, query: str) -> Any:
10
- """Retrieves context based on the query."""
16
+ async def get_retrieved_objects(self, query: str) -> Any:
17
+ """
18
+ Retrieves the raw data points from the underlying storage (Graph or Vector DB).
19
+
20
+ Args:
21
+ query (str): The search query or input string.
22
+
23
+ Returns:
24
+ List[Any]: A list of raw objects (e.g., Edge objects, Document chunks)
25
+ relevant to the query.
26
+ """
11
27
  pass
12
28
 
13
29
  @abstractmethod
14
- async def get_completion(
30
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
31
+ """
32
+ Transforms raw retrieved objects into a structured context for the LLM.
33
+
34
+ Args:
35
+ query (str): The search query or input string.
36
+ retrieved_objects (List[Any]): The output from get_retrieved_objects.
37
+
38
+ Returns:
39
+ Any: The formatted context (typically a string or a list of strings)
40
+ to be injected into a prompt.
41
+ """
42
+ pass
43
+
44
+ @abstractmethod
45
+ async def get_completion_from_context(
15
46
  self,
16
47
  query: str,
17
- context: Optional[Any] = None,
18
- session_id: Optional[str] = None,
19
- response_model: Type = str,
20
- ) -> List[Any]:
21
- """Generates a response using the query and optional context."""
48
+ retrieved_objects: Any,
49
+ context: Any,
50
+ ) -> Union[List[str], List[dict]]:
51
+ """
52
+ Generates a final output or answer based on the query and retrieved context.
53
+
54
+ Args:
55
+ query (str): The original user query.
56
+ retrieved_objects (List[Any]): The output from get_retrieved_objects.
57
+ context (Optional[Any]): The formatted context string/data used to
58
+ augment the generation. Output from get_context_from_objects.
59
+
60
+ Returns:
61
+ List[Any]: A list containing the generated completions or response objects.
62
+ """
22
63
  pass
64
+
65
+ async def get_completion(self, query: str) -> Union[List[str], List[dict]]:
66
+ """
67
+ Generates a final output or answer based on the query and retrieved context.
68
+
69
+ Args:
70
+ query (str): The original user query.
71
+
72
+ Returns:
73
+ List[Any]: A list containing the generated completions or response objects.
74
+ """
75
+ retrieved_objects = await self.get_retrieved_objects(query)
76
+ context = await self.get_context_from_objects(query, retrieved_objects)
77
+ completion = await self.get_completion_from_context(query, retrieved_objects, context)
78
+ return completion
@@ -1,10 +1,11 @@
1
- from typing import Any, Optional
2
-
1
+ from typing import Any, Optional, List, Union
2
+ from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
3
3
  from cognee.shared.logging_utils import get_logger
4
4
  from cognee.infrastructure.databases.vector import get_vector_engine
5
5
  from cognee.modules.retrieval.base_retriever import BaseRetriever
6
6
  from cognee.modules.retrieval.exceptions.exceptions import NoDataError
7
7
  from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
8
+ from datetime import datetime, timezone
8
9
 
9
10
  logger = get_logger("ChunksRetriever")
10
11
 
@@ -27,75 +28,82 @@ class ChunksRetriever(BaseRetriever):
27
28
  ):
28
29
  self.top_k = top_k
29
30
 
30
- async def get_context(self, query: str) -> Any:
31
+ async def get_completion_from_context(
32
+ self, query: str, retrieved_objects: Any, context: Any
33
+ ) -> Union[List[str], List[dict]]:
31
34
  """
32
- Retrieves document chunks context based on the query.
33
-
34
- Searches for document chunks relevant to the specified query using a vector engine.
35
- Raises a NoDataError if no data is found in the system.
35
+ Generates a completion using document chunks context.
36
+ In case of the Chunks Retriever, we do not generate a completion, we just return
37
+ the payloads of found chunks.
36
38
 
37
39
  Parameters:
38
40
  -----------
39
41
 
40
- - query (str): The query string to search for relevant document chunks.
42
+ - query (str): The query string to be used for generating a completion.
43
+ - retrieved_objects (Any): The retrieved objects to be used for generating a completion.
44
+ - context (Any): The context to be used for generating a completion.
41
45
 
42
46
  Returns:
43
47
  --------
44
48
 
45
- - Any: A list of document chunk payloads retrieved from the search.
49
+ - List[dict]: A list of payloads of found chunks.
46
50
  """
47
- logger.info(
48
- f"Starting chunk retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
49
- )
50
-
51
- vector_engine = get_vector_engine()
52
-
53
- try:
54
- found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
55
- logger.info(f"Found {len(found_chunks)} chunks from vector search")
56
- except CollectionNotFoundError as error:
57
- logger.error("DocumentChunk_text collection not found in vector database")
58
- raise NoDataError("No data found in the system, please add data first.") from error
59
-
60
- chunk_payloads = [result.payload for result in found_chunks]
61
- logger.info(f"Returning {len(chunk_payloads)} chunk payloads")
62
- return chunk_payloads
51
+ # TODO: Do we want to generate a completion using LLM here?
52
+ if retrieved_objects:
53
+ chunk_payloads = [found_chunk.payload for found_chunk in retrieved_objects]
54
+ return chunk_payloads
55
+ else:
56
+ return []
63
57
 
64
- async def get_completion(
65
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
66
- ) -> Any:
58
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
67
59
  """
68
- Generates a completion using document chunks context.
69
-
70
- If the context is not provided, it retrieves the context based on the query. Returns the
71
- context, which can be used for further processing or generation of outputs.
60
+ Retrieves context from retrieved chunks, in text form.
72
61
 
73
62
  Parameters:
74
63
  -----------
75
64
 
76
- - query (str): The query string to be used for generating a completion.
77
- - context (Optional[Any]): Optional pre-fetched context to use for generating the
78
- completion; if None, it retrieves the context for the query. (default None)
79
- - session_id (Optional[str]): Optional session identifier for caching. If None,
80
- defaults to 'default_session'. (default None)
65
+ - query (str): The query string used to search for relevant document chunks.
66
+ - retrieved_objects (Any): The retrieved objects to be used for generating textual context.
81
67
 
82
68
  Returns:
83
69
  --------
84
70
 
85
- - Any: The context used for the completion or the retrieved context if none was
86
- provided.
71
+ - str: A string containing the combined text of the retrieved chunks, or an
72
+ empty string if none are found.
87
73
  """
88
- logger.info(
89
- f"Starting completion generation for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
90
- )
91
-
92
- if context is None:
93
- logger.debug("No context provided, retrieving context from vector database")
94
- context = await self.get_context(query)
74
+ if retrieved_objects:
75
+ chunk_payload_texts = [found_chunk.payload["text"] for found_chunk in retrieved_objects]
76
+ return "\n".join(chunk_payload_texts)
95
77
  else:
96
- logger.debug("Using provided context")
78
+ return ""
97
79
 
80
+ async def get_retrieved_objects(self, query: str) -> Any:
81
+ """
82
+ Retrieves document chunks context based on the query.
83
+ Searches for document chunks relevant to the specified query using a vector engine.
84
+ Raises a NoDataError if no data is found in the system.
85
+ Parameters:
86
+ -----------
87
+ - query (str): The query string to search for relevant document chunks.
88
+ Returns:
89
+ --------
90
+ - Any: A list of document chunks retrieved from the search.
91
+ """
98
92
  logger.info(
99
- f"Returning context with {len(context) if isinstance(context, list) else 1} item(s)"
93
+ f"Starting chunk retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
100
94
  )
101
- return context
95
+
96
+ vector_engine = get_vector_engine()
97
+
98
+ try:
99
+ found_chunks = await vector_engine.search(
100
+ "DocumentChunk_text", query, limit=self.top_k, include_payload=True
101
+ )
102
+ logger.info(f"Found {len(found_chunks)} chunks from vector search")
103
+ await update_node_access_timestamps(found_chunks)
104
+
105
+ return found_chunks
106
+
107
+ except CollectionNotFoundError as error:
108
+ logger.error("DocumentChunk_text collection not found in vector database")
109
+ raise NoDataError("No data found in the system, please add data first.") from error
@@ -1,17 +1,18 @@
1
1
  import asyncio
2
2
  from functools import reduce
3
- from typing import List, Optional
3
+ from typing import List, Optional, Any
4
4
  from cognee.shared.logging_utils import get_logger
5
+ from cognee.modules.retrieval.base_retriever import BaseRetriever
5
6
  from cognee.tasks.codingagents.coding_rule_associations import get_existing_rules
6
7
 
7
8
  logger = get_logger("CodingRulesRetriever")
8
9
 
9
10
 
10
- class CodingRulesRetriever:
11
+ class CodingRulesRetriever(BaseRetriever):
11
12
  """Retriever for handling codeing rule based searches."""
12
13
 
13
14
  def __init__(self, rules_nodeset_name: Optional[List[str]] = None):
14
- if isinstance(rules_nodeset_name, list):
15
+ if isinstance(rules_nodeset_name, list) or rules_nodeset_name is None:
15
16
  if not rules_nodeset_name:
16
17
  # If there is no provided nodeset set to coding_agent_rules
17
18
  rules_nodeset_name = ["coding_agent_rules"]
@@ -19,7 +20,7 @@ class CodingRulesRetriever:
19
20
  self.rules_nodeset_name = rules_nodeset_name
20
21
  """Initialize retriever with search parameters."""
21
22
 
22
- async def get_existing_rules(self, query_text):
23
+ async def get_retrieved_objects(self, query: str) -> Any:
23
24
  if self.rules_nodeset_name:
24
25
  rules_list = await asyncio.gather(
25
26
  *[
@@ -27,5 +28,11 @@ class CodingRulesRetriever:
27
28
  for nodeset in self.rules_nodeset_name
28
29
  ]
29
30
  )
30
-
31
31
  return reduce(lambda x, y: x + y, rules_list, [])
32
+
33
+ async def get_context_from_objects(self, query, retrieved_objects):
34
+ return retrieved_objects
35
+
36
+ async def get_completion_from_context(self, query, retrieved_objects, context):
37
+ # TODO: Add completion generation logic if needed
38
+ return context