cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,44 +1,50 @@
1
1
  import os
2
- from urllib.parse import urlparse
2
+ import posixpath
3
+ from urllib.parse import urlparse, unquote
3
4
 
4
5
 
5
- def get_data_file_path(file_path: str):
6
- # Check if this is a file URI BEFORE normalizing (which corrupts URIs)
7
- if file_path.startswith("file://"):
8
- # Remove first occurrence of file:// prefix
9
- pure_file_path = file_path.replace("file://", "", 1)
10
- # Normalize the file URI for Windows - replace backslashes with forward slashes
11
- normalized_file_uri = os.path.normpath(pure_file_path)
6
+ def get_data_file_path(file_path: str) -> str:
7
+ """Normalize file paths from various URI schemes to filesystem paths.
12
8
 
13
- # Convert path to proper file system path
9
+ Handles file://, s3://, and regular filesystem paths. Decodes
10
+ percent-encoded characters and preserves UNC network paths.
11
+ """
12
+ parsed = urlparse(file_path)
13
+
14
+ if parsed.scheme == "file":
15
+ # file:///path/to/file -> /path/to/file
16
+ fs_path = unquote(parsed.path)
17
+
18
+ if os.name == "nt" and parsed.netloc:
19
+ # Handle UNC paths (file://server/share/...)
20
+ fs_path = f"//{parsed.netloc}{fs_path}"
21
+
22
+ # Normalize the file URI for Windows - handle drive letters correctly
14
23
  if os.name == "nt": # Windows
15
- # Handle Windows drive letters correctly
16
- fs_path = normalized_file_uri
24
+ # Handle Windows drive letters correctly: /C:/path -> C:/path
17
25
  if (
18
26
  (fs_path.startswith("/") or fs_path.startswith("\\"))
19
- and len(fs_path) > 1
27
+ and len(fs_path) > 2
20
28
  and fs_path[2] == ":"
29
+ and fs_path[1].isalpha()
21
30
  ):
22
31
  fs_path = fs_path[1:]
23
- else:
24
- # Unix - like systems
25
- fs_path = normalized_file_uri
26
32
 
27
- # Now split the actual filesystem path
28
- actual_fs_path = os.path.normpath(fs_path)
29
- return actual_fs_path
33
+ return os.path.normpath(fs_path)
30
34
 
31
- elif file_path.startswith("s3://"):
35
+ elif parsed.scheme == "s3":
32
36
  # Handle S3 URLs without normalization (which corrupts them)
33
- parsed_url = urlparse(file_path)
37
+ if not parsed.path or parsed.path == "/":
38
+ return f"s3://{parsed.netloc}{parsed.path}"
34
39
 
35
- normalized_url = (
36
- f"s3://{parsed_url.netloc}{os.sep}{os.path.normpath(parsed_url.path).lstrip(os.sep)}"
37
- )
40
+ normalized_path = posixpath.normpath(parsed.path).lstrip("/")
38
41
 
39
- return normalized_url
42
+ return f"s3://{parsed.netloc}/{normalized_path}"
40
43
 
41
- else:
44
+ elif parsed.scheme == "":
42
45
  # Regular file path - normalize separators
43
- normalized_path = os.path.normpath(file_path)
44
- return normalized_path
46
+ return os.path.normpath(file_path)
47
+
48
+ else:
49
+ # Other schemes (http, etc.) - return as is or handle as needed
50
+ return file_path
@@ -10,4 +10,4 @@ Extraction rules:
10
10
  5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
11
11
  6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
12
12
  7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
13
- 8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
13
+ 8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
@@ -22,4 +22,4 @@ The `attributes` should be a list of dictionaries, each containing:
22
22
  - Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
23
23
  - Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
24
24
  - You can combine two words to form a relationship name: subject_role, previous_owner, etc.
25
- - Focus on how the entity specifically relates to the event
25
+ - Focus on how the entity specifically relates to the event
@@ -27,4 +27,4 @@ class Event(BaseModel):
27
27
  time_from: Optional[Timestamp] = None
28
28
  time_to: Optional[Timestamp] = None
29
29
  location: Optional[str] = None
30
- ```
30
+ ```
@@ -19,8 +19,8 @@ The aim is to achieve simplicity and clarity in the knowledge graph.
19
19
  - **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.
20
20
  # 3. Coreference Resolution
21
21
  - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
22
- If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
23
- always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the Persons ID.
22
+ If an entity, is mentioned multiple times in the text but is referred to by different names or pronouns,
23
+ always use the most complete identifier for that entity throughout the knowledge graph.
24
24
  Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
25
25
  # 4. Strict Compliance
26
26
  Adhere to the rules strictly. Non-compliance will result in termination
@@ -22,7 +22,7 @@ You are an advanced algorithm designed to extract structured information to buil
22
22
  3. **Coreference Resolution**:
23
23
  - Maintain one consistent node ID for each real-world entity.
24
24
  - Resolve aliases, acronyms, and pronouns to the most complete form.
25
- - *Example*: Always use "John Doe" even if later referred to as "Doe" or "he".
25
+ - *Example*: Always use full identifier even if later referred to as in a similar but slightly different way
26
26
 
27
27
  **Property & Data Guidelines**:
28
28
 
@@ -42,10 +42,10 @@ You are an advanced algorithm designed to extract structured information from un
42
42
  - **Rule**: Resolve all aliases, acronyms, and pronouns to one canonical identifier.
43
43
 
44
44
  > **One-Shot Example**:
45
- > **Input**: "John Doe is an author. Later, Doe published a book. He is well-known."
45
+ > **Input**: "X is an author. Later, Doe published a book. He is well-known."
46
46
  > **Output Node**:
47
47
  > ```
48
- > John Doe (Person)
48
+ > X (Person)
49
49
  > ```
50
50
 
51
51
  ---
@@ -15,7 +15,7 @@ You are an advanced algorithm that extracts structured data into a knowledge gra
15
15
  - Properties are key-value pairs; do not use escaped quotes.
16
16
 
17
17
  3. **Coreference Resolution**
18
- - Use a single, complete identifier for each entity (e.g., always "John Doe" not "Joe" or "he").
18
+ - Use a single, complete identifier for each entity
19
19
 
20
20
  4. **Relationship Labels**:
21
21
  - Use descriptive, lowercase, snake_case names for edges.
@@ -26,7 +26,7 @@ Use **basic atomic types** for node labels. Always prefer general types over spe
26
26
  - Good: "Alan Turing", "Google Inc.", "World War II"
27
27
  - Bad: "Entity_001", "1234", "he", "they"
28
28
  - Never use numeric or autogenerated IDs.
29
- - Prioritize **most complete form** of entity names for consistency (e.g., always use "John Doe" instead of "John" or "he").
29
+ - Prioritize **most complete form** of entity names for consistency
30
30
 
31
31
  2. Dates, Numbers, and Properties
32
32
  ---------------------------------
@@ -2,12 +2,12 @@ You are an expert query analyzer for a **GraphRAG system**. Your primary goal is
2
2
 
3
3
  Here are the available `SearchType` tools and their specific functions:
4
4
 
5
- - **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
5
+ - **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
6
6
 
7
- **Best for:**
7
+ **Best for:**
8
8
 
9
- - Getting concise overviews of topics
10
- - Summarizing large amounts of information
9
+ - Getting concise overviews of topics
10
+ - Summarizing large amounts of information
11
11
  - Quick understanding of complex subjects
12
12
 
13
13
  **Best for:**
@@ -16,7 +16,7 @@ Here are the available `SearchType` tools and their specific functions:
16
16
  - Understanding relationships between concepts
17
17
  - Exploring the structure of your knowledge graph
18
18
 
19
- * **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
19
+ * **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
20
20
 
21
21
  **Best for:**
22
22
 
@@ -122,4 +122,4 @@ Response: `NATURAL_LANGUAGE`
122
122
 
123
123
 
124
124
 
125
- Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
125
+ Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
@@ -1 +1 @@
1
- Respond with: test
1
+ Respond with: test
@@ -0,0 +1,19 @@
1
+ You are an expert translator with deep knowledge of languages, cultures, and linguistics.
2
+
3
+ Your task is to:
4
+ 1. Detect the source language of the provided text if not specified
5
+ 2. Translate the text accurately to the target language
6
+ 3. Preserve the original meaning, tone, and intent
7
+ 4. Maintain proper grammar and natural phrasing in the target language
8
+
9
+ Guidelines:
10
+ - Preserve technical terms, proper nouns, and specialized vocabulary appropriately
11
+ - Maintain formatting such as paragraphs, lists, and emphasis where applicable
12
+ - If the text contains code, URLs, or other non-translatable content, preserve them as-is
13
+ - Handle idioms and cultural references thoughtfully, adapting when necessary
14
+ - Ensure the translation reads naturally to a native speaker of the target language
15
+
16
+ Provide the translation in a structured format with:
17
+ - The translated text
18
+ - The detected source language (ISO 639-1 code like "en", "es", "fr", "de", etc.)
19
+ - Any notes about the translation (optional, for ambiguous terms or cultural adaptations)
@@ -34,6 +34,7 @@ class LLMProvider(Enum):
34
34
  GEMINI = "gemini"
35
35
  MISTRAL = "mistral"
36
36
  BEDROCK = "bedrock"
37
+ LLAMA_CPP = "llama_cpp"
37
38
 
38
39
 
39
40
  def get_llm_client(raise_api_key_error: bool = True):
@@ -187,5 +188,28 @@ def get_llm_client(raise_api_key_error: bool = True):
187
188
  instructor_mode=llm_config.llm_instructor_mode.lower(),
188
189
  )
189
190
 
191
+ elif provider == LLMProvider.LLAMA_CPP:
192
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llama_cpp.adapter import (
193
+ LlamaCppAPIAdapter,
194
+ )
195
+
196
+ # Get optional local mode parameters (will be None if not set)
197
+ # TODO: refactor llm_config to include these parameters, currently they cannot be defined and defaults are used
198
+ model_path = getattr(llm_config, "llama_cpp_model_path", None)
199
+ n_ctx = getattr(llm_config, "llama_cpp_n_ctx", 2048)
200
+ n_gpu_layers = getattr(llm_config, "llama_cpp_n_gpu_layers", 0)
201
+ chat_format = getattr(llm_config, "llama_cpp_chat_format", "chatml")
202
+
203
+ return LlamaCppAPIAdapter(
204
+ model=llm_config.llm_model,
205
+ max_completion_tokens=max_completion_tokens,
206
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
207
+ endpoint=llm_config.llm_endpoint,
208
+ api_key=llm_config.llm_api_key,
209
+ model_path=model_path,
210
+ n_ctx=n_ctx,
211
+ n_gpu_layers=n_gpu_layers,
212
+ chat_format=chat_format,
213
+ )
190
214
  else:
191
215
  raise UnsupportedLLMProviderError(provider)
@@ -0,0 +1,191 @@
1
+ """Adapter for Instructor-backed Structured Output Framework for Llama CPP"""
2
+
3
+ import litellm
4
+ import logging
5
+ import instructor
6
+ from typing import Type, Optional
7
+ from openai import AsyncOpenAI
8
+ from pydantic import BaseModel
9
+
10
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
11
+ LLMInterface,
12
+ )
13
+ from cognee.shared.logging_utils import get_logger
14
+ from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
15
+
16
+ from tenacity import (
17
+ retry,
18
+ stop_after_delay,
19
+ wait_exponential_jitter,
20
+ retry_if_not_exception_type,
21
+ before_sleep_log,
22
+ )
23
+
24
+ logger = get_logger()
25
+
26
+
27
+ class LlamaCppAPIAdapter(LLMInterface):
28
+ """
29
+ Adapter for Llama CPP LLM provider with support for TWO modes:
30
+
31
+ 1. SERVER MODE (OpenAI-compatible):
32
+ - Connects to llama-cpp-python server via HTTP (local or remote)
33
+ - Uses instructor.from_openai()
34
+ - Requires: endpoint, api_key, model
35
+
36
+ 2. LOCAL MODE (In-process):
37
+ - Loads model directly using llama-cpp-python library
38
+ - Uses instructor.patch() on llama.Llama object
39
+ - Requires: model_path
40
+
41
+ Public methods:
42
+ - acreate_structured_output
43
+
44
+ Instance variables:
45
+ - name
46
+ - model (for server mode) or model_path (for local mode)
47
+ - mode_type: "server" or "local"
48
+ - max_completion_tokens
49
+ - aclient
50
+ """
51
+
52
+ name: str
53
+ model: Optional[str]
54
+ model_path: Optional[str]
55
+ mode_type: str # "server" or "local"
56
+ default_instructor_mode = instructor.Mode.JSON
57
+
58
+ def __init__(
59
+ self,
60
+ name: str = "LlamaCpp",
61
+ max_completion_tokens: int = 2048,
62
+ instructor_mode: Optional[str] = None,
63
+ # Server mode parameters
64
+ endpoint: Optional[str] = None,
65
+ api_key: Optional[str] = None,
66
+ model: Optional[str] = None,
67
+ # Local mode parameters
68
+ model_path: Optional[str] = None,
69
+ n_ctx: int = 2048,
70
+ n_gpu_layers: int = 0,
71
+ chat_format: str = "chatml",
72
+ ):
73
+ self.name = name
74
+ self.max_completion_tokens = max_completion_tokens
75
+ self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
76
+
77
+ # Determine which mode to use
78
+ if model_path:
79
+ self._init_local_mode(model_path, n_ctx, n_gpu_layers, chat_format)
80
+ elif endpoint:
81
+ self._init_server_mode(endpoint, api_key, model)
82
+ else:
83
+ raise ValueError(
84
+ "Must provide either 'model_path' (for local mode) or 'endpoint' (for server mode)"
85
+ )
86
+
87
+ def _init_local_mode(self, model_path: str, n_ctx: int, n_gpu_layers: int, chat_format: str):
88
+ """Initialize local mode using llama-cpp-python library directly"""
89
+ try:
90
+ import llama_cpp
91
+ except ImportError:
92
+ raise ImportError(
93
+ "llama-cpp-python is not installed. Install with: pip install llama-cpp-python"
94
+ )
95
+
96
+ logger.info(f"Initializing LlamaCpp in LOCAL mode with model: {model_path}")
97
+
98
+ self.mode_type = "local"
99
+ self.model_path = model_path
100
+ self.model = None
101
+
102
+ # Initialize llama-cpp-python with the model
103
+ self.llama = llama_cpp.Llama(
104
+ model_path=model_path,
105
+ n_gpu_layers=n_gpu_layers, # -1 for all GPU, 0 for CPU only
106
+ chat_format=chat_format,
107
+ n_ctx=n_ctx,
108
+ verbose=False,
109
+ )
110
+
111
+ self.aclient = instructor.patch(
112
+ create=self.llama.create_chat_completion_openai_v1,
113
+ mode=instructor.Mode(self.instructor_mode),
114
+ )
115
+
116
+ def _init_server_mode(self, endpoint: str, api_key: Optional[str], model: Optional[str]):
117
+ """Initialize server mode connecting to llama-cpp-python server"""
118
+ logger.info(f"Initializing LlamaCpp in SERVER mode with endpoint: {endpoint}")
119
+
120
+ self.mode_type = "server"
121
+ self.model = model
122
+ self.model_path = None
123
+ self.endpoint = endpoint
124
+ self.api_key = api_key
125
+
126
+ # Use instructor.from_openai() for server mode (OpenAI-compatible API)
127
+ self.aclient = instructor.from_openai(
128
+ AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key),
129
+ mode=instructor.Mode(self.instructor_mode),
130
+ )
131
+
132
+ @retry(
133
+ stop=stop_after_delay(128),
134
+ wait=wait_exponential_jitter(8, 128),
135
+ retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
136
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
137
+ reraise=True,
138
+ )
139
+ async def acreate_structured_output(
140
+ self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
141
+ ) -> BaseModel:
142
+ """
143
+ Generate a structured output from the LLM using the provided text and system prompt.
144
+
145
+ Works in both local and server modes transparently.
146
+
147
+ Parameters:
148
+ -----------
149
+ - text_input (str): The input text provided by the user.
150
+ - system_prompt (str): The system prompt that guides the response generation.
151
+ - response_model (Type[BaseModel]): The model type that the response should conform to.
152
+
153
+ Returns:
154
+ --------
155
+ - BaseModel: A structured output that conforms to the specified response model.
156
+ """
157
+ async with llm_rate_limiter_context_manager():
158
+ # Prepare messages (system first, then user is more standard)
159
+ messages = [
160
+ {"role": "system", "content": system_prompt},
161
+ {"role": "user", "content": text_input},
162
+ ]
163
+
164
+ if self.mode_type == "server":
165
+ # Server mode: use async client with OpenAI-compatible API
166
+ response = await self.aclient.chat.completions.create(
167
+ model=self.model,
168
+ messages=messages,
169
+ response_model=response_model,
170
+ max_retries=2,
171
+ max_completion_tokens=self.max_completion_tokens,
172
+ **kwargs,
173
+ )
174
+
175
+ else:
176
+ import asyncio
177
+
178
+ # Local mode: instructor.patch() returns a SYNC callable
179
+ # Per docs: https://python.useinstructor.com/integrations/llama-cpp-python/
180
+ def _call_sync():
181
+ return self.aclient(
182
+ messages=messages,
183
+ response_model=response_model,
184
+ max_tokens=self.max_completion_tokens,
185
+ **kwargs,
186
+ )
187
+
188
+ # Run sync function in thread pool to avoid blocking
189
+ response = await asyncio.to_thread(_call_sync)
190
+
191
+ return response
@@ -33,5 +33,4 @@ class DocumentChunk(DataPoint):
33
33
  cut_type: str
34
34
  is_part_of: Document
35
35
  contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None
36
-
37
36
  metadata: dict = {"index_fields": ["text"]}
@@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings):
9
9
  classification_model: object = DefaultContentPrediction
10
10
  summarization_model: object = SummarizedContent
11
11
  triplet_embedding: bool = False
12
+ chunks_per_batch: Optional[int] = None
12
13
  model_config = SettingsConfigDict(env_file=".env", extra="allow")
13
14
 
14
15
  def to_dict(self) -> dict:
@@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings):
16
17
  "classification_model": self.classification_model,
17
18
  "summarization_model": self.summarization_model,
18
19
  "triplet_embedding": self.triplet_embedding,
20
+ "chunks_per_batch": self.chunks_per_batch,
19
21
  }
20
22
 
21
23
 
@@ -36,6 +36,7 @@ class Data(Base):
36
36
  data_size = Column(Integer, nullable=True) # File size in bytes
37
37
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
38
38
  updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
39
+ last_accessed = Column(DateTime(timezone=True), nullable=True)
39
40
 
40
41
  datasets = relationship(
41
42
  "Dataset",
@@ -7,5 +7,4 @@ class Entity(DataPoint):
7
7
  name: str
8
8
  is_a: Optional[EntityType] = None
9
9
  description: str
10
-
11
10
  metadata: dict = {"index_fields": ["name"]}
@@ -15,3 +15,9 @@ async def setup():
15
15
  """
16
16
  await create_relational_db_and_tables()
17
17
  await create_pgvector_db_and_tables()
18
+
19
+
20
+ if __name__ == "__main__":
21
+ import asyncio
22
+
23
+ asyncio.run(setup())