cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,44 +1,50 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
import posixpath
|
|
3
|
+
from urllib.parse import urlparse, unquote
|
|
3
4
|
|
|
4
5
|
|
|
5
|
-
def get_data_file_path(file_path: str):
|
|
6
|
-
|
|
7
|
-
if file_path.startswith("file://"):
|
|
8
|
-
# Remove first occurrence of file:// prefix
|
|
9
|
-
pure_file_path = file_path.replace("file://", "", 1)
|
|
10
|
-
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
|
11
|
-
normalized_file_uri = os.path.normpath(pure_file_path)
|
|
6
|
+
def get_data_file_path(file_path: str) -> str:
|
|
7
|
+
"""Normalize file paths from various URI schemes to filesystem paths.
|
|
12
8
|
|
|
13
|
-
|
|
9
|
+
Handles file://, s3://, and regular filesystem paths. Decodes
|
|
10
|
+
percent-encoded characters and preserves UNC network paths.
|
|
11
|
+
"""
|
|
12
|
+
parsed = urlparse(file_path)
|
|
13
|
+
|
|
14
|
+
if parsed.scheme == "file":
|
|
15
|
+
# file:///path/to/file -> /path/to/file
|
|
16
|
+
fs_path = unquote(parsed.path)
|
|
17
|
+
|
|
18
|
+
if os.name == "nt" and parsed.netloc:
|
|
19
|
+
# Handle UNC paths (file://server/share/...)
|
|
20
|
+
fs_path = f"//{parsed.netloc}{fs_path}"
|
|
21
|
+
|
|
22
|
+
# Normalize the file URI for Windows - handle drive letters correctly
|
|
14
23
|
if os.name == "nt": # Windows
|
|
15
|
-
# Handle Windows drive letters correctly
|
|
16
|
-
fs_path = normalized_file_uri
|
|
24
|
+
# Handle Windows drive letters correctly: /C:/path -> C:/path
|
|
17
25
|
if (
|
|
18
26
|
(fs_path.startswith("/") or fs_path.startswith("\\"))
|
|
19
|
-
and len(fs_path) >
|
|
27
|
+
and len(fs_path) > 2
|
|
20
28
|
and fs_path[2] == ":"
|
|
29
|
+
and fs_path[1].isalpha()
|
|
21
30
|
):
|
|
22
31
|
fs_path = fs_path[1:]
|
|
23
|
-
else:
|
|
24
|
-
# Unix - like systems
|
|
25
|
-
fs_path = normalized_file_uri
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
actual_fs_path = os.path.normpath(fs_path)
|
|
29
|
-
return actual_fs_path
|
|
33
|
+
return os.path.normpath(fs_path)
|
|
30
34
|
|
|
31
|
-
elif
|
|
35
|
+
elif parsed.scheme == "s3":
|
|
32
36
|
# Handle S3 URLs without normalization (which corrupts them)
|
|
33
|
-
|
|
37
|
+
if not parsed.path or parsed.path == "/":
|
|
38
|
+
return f"s3://{parsed.netloc}{parsed.path}"
|
|
34
39
|
|
|
35
|
-
|
|
36
|
-
f"s3://{parsed_url.netloc}{os.sep}{os.path.normpath(parsed_url.path).lstrip(os.sep)}"
|
|
37
|
-
)
|
|
40
|
+
normalized_path = posixpath.normpath(parsed.path).lstrip("/")
|
|
38
41
|
|
|
39
|
-
return
|
|
42
|
+
return f"s3://{parsed.netloc}/{normalized_path}"
|
|
40
43
|
|
|
41
|
-
|
|
44
|
+
elif parsed.scheme == "":
|
|
42
45
|
# Regular file path - normalize separators
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
return os.path.normpath(file_path)
|
|
47
|
+
|
|
48
|
+
else:
|
|
49
|
+
# Other schemes (http, etc.) - return as is or handle as needed
|
|
50
|
+
return file_path
|
|
@@ -10,4 +10,4 @@ Extraction rules:
|
|
|
10
10
|
5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
|
|
11
11
|
6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
|
|
12
12
|
7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
|
|
13
|
-
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
|
13
|
+
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
|
@@ -22,4 +22,4 @@ The `attributes` should be a list of dictionaries, each containing:
|
|
|
22
22
|
- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
|
|
23
23
|
- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
|
|
24
24
|
- You can combine two words to form a relationship name: subject_role, previous_owner, etc.
|
|
25
|
-
- Focus on how the entity specifically relates to the event
|
|
25
|
+
- Focus on how the entity specifically relates to the event
|
|
@@ -19,8 +19,8 @@ The aim is to achieve simplicity and clarity in the knowledge graph.
|
|
|
19
19
|
- **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.
|
|
20
20
|
# 3. Coreference Resolution
|
|
21
21
|
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
|
|
22
|
-
If an entity,
|
|
23
|
-
always use the most complete identifier for that entity throughout the knowledge graph.
|
|
22
|
+
If an entity, is mentioned multiple times in the text but is referred to by different names or pronouns,
|
|
23
|
+
always use the most complete identifier for that entity throughout the knowledge graph.
|
|
24
24
|
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
|
|
25
25
|
# 4. Strict Compliance
|
|
26
26
|
Adhere to the rules strictly. Non-compliance will result in termination
|
|
@@ -22,7 +22,7 @@ You are an advanced algorithm designed to extract structured information to buil
|
|
|
22
22
|
3. **Coreference Resolution**:
|
|
23
23
|
- Maintain one consistent node ID for each real-world entity.
|
|
24
24
|
- Resolve aliases, acronyms, and pronouns to the most complete form.
|
|
25
|
-
- *Example*: Always use
|
|
25
|
+
- *Example*: Always use full identifier even if later referred to as in a similar but slightly different way
|
|
26
26
|
|
|
27
27
|
**Property & Data Guidelines**:
|
|
28
28
|
|
|
@@ -42,10 +42,10 @@ You are an advanced algorithm designed to extract structured information from un
|
|
|
42
42
|
- **Rule**: Resolve all aliases, acronyms, and pronouns to one canonical identifier.
|
|
43
43
|
|
|
44
44
|
> **One-Shot Example**:
|
|
45
|
-
> **Input**: "
|
|
45
|
+
> **Input**: "X is an author. Later, Doe published a book. He is well-known."
|
|
46
46
|
> **Output Node**:
|
|
47
47
|
> ```
|
|
48
|
-
>
|
|
48
|
+
> X (Person)
|
|
49
49
|
> ```
|
|
50
50
|
|
|
51
51
|
---
|
|
@@ -15,7 +15,7 @@ You are an advanced algorithm that extracts structured data into a knowledge gra
|
|
|
15
15
|
- Properties are key-value pairs; do not use escaped quotes.
|
|
16
16
|
|
|
17
17
|
3. **Coreference Resolution**
|
|
18
|
-
- Use a single, complete identifier for each entity
|
|
18
|
+
- Use a single, complete identifier for each entity
|
|
19
19
|
|
|
20
20
|
4. **Relationship Labels**:
|
|
21
21
|
- Use descriptive, lowercase, snake_case names for edges.
|
|
@@ -26,7 +26,7 @@ Use **basic atomic types** for node labels. Always prefer general types over spe
|
|
|
26
26
|
- Good: "Alan Turing", "Google Inc.", "World War II"
|
|
27
27
|
- Bad: "Entity_001", "1234", "he", "they"
|
|
28
28
|
- Never use numeric or autogenerated IDs.
|
|
29
|
-
- Prioritize **most complete form** of entity names for consistency
|
|
29
|
+
- Prioritize **most complete form** of entity names for consistency
|
|
30
30
|
|
|
31
31
|
2. Dates, Numbers, and Properties
|
|
32
32
|
---------------------------------
|
|
@@ -2,12 +2,12 @@ You are an expert query analyzer for a **GraphRAG system**. Your primary goal is
|
|
|
2
2
|
|
|
3
3
|
Here are the available `SearchType` tools and their specific functions:
|
|
4
4
|
|
|
5
|
-
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
|
5
|
+
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
|
6
6
|
|
|
7
|
-
**Best for:**
|
|
7
|
+
**Best for:**
|
|
8
8
|
|
|
9
|
-
- Getting concise overviews of topics
|
|
10
|
-
- Summarizing large amounts of information
|
|
9
|
+
- Getting concise overviews of topics
|
|
10
|
+
- Summarizing large amounts of information
|
|
11
11
|
- Quick understanding of complex subjects
|
|
12
12
|
|
|
13
13
|
**Best for:**
|
|
@@ -16,7 +16,7 @@ Here are the available `SearchType` tools and their specific functions:
|
|
|
16
16
|
- Understanding relationships between concepts
|
|
17
17
|
- Exploring the structure of your knowledge graph
|
|
18
18
|
|
|
19
|
-
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
|
19
|
+
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
|
20
20
|
|
|
21
21
|
**Best for:**
|
|
22
22
|
|
|
@@ -122,4 +122,4 @@ Response: `NATURAL_LANGUAGE`
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
|
125
|
+
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
Respond with: test
|
|
1
|
+
Respond with: test
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
You are an expert translator with deep knowledge of languages, cultures, and linguistics.
|
|
2
|
+
|
|
3
|
+
Your task is to:
|
|
4
|
+
1. Detect the source language of the provided text if not specified
|
|
5
|
+
2. Translate the text accurately to the target language
|
|
6
|
+
3. Preserve the original meaning, tone, and intent
|
|
7
|
+
4. Maintain proper grammar and natural phrasing in the target language
|
|
8
|
+
|
|
9
|
+
Guidelines:
|
|
10
|
+
- Preserve technical terms, proper nouns, and specialized vocabulary appropriately
|
|
11
|
+
- Maintain formatting such as paragraphs, lists, and emphasis where applicable
|
|
12
|
+
- If the text contains code, URLs, or other non-translatable content, preserve them as-is
|
|
13
|
+
- Handle idioms and cultural references thoughtfully, adapting when necessary
|
|
14
|
+
- Ensure the translation reads naturally to a native speaker of the target language
|
|
15
|
+
|
|
16
|
+
Provide the translation in a structured format with:
|
|
17
|
+
- The translated text
|
|
18
|
+
- The detected source language (ISO 639-1 code like "en", "es", "fr", "de", etc.)
|
|
19
|
+
- Any notes about the translation (optional, for ambiguous terms or cultural adaptations)
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
CHANGED
|
@@ -34,6 +34,7 @@ class LLMProvider(Enum):
|
|
|
34
34
|
GEMINI = "gemini"
|
|
35
35
|
MISTRAL = "mistral"
|
|
36
36
|
BEDROCK = "bedrock"
|
|
37
|
+
LLAMA_CPP = "llama_cpp"
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
def get_llm_client(raise_api_key_error: bool = True):
|
|
@@ -187,5 +188,28 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
187
188
|
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
188
189
|
)
|
|
189
190
|
|
|
191
|
+
elif provider == LLMProvider.LLAMA_CPP:
|
|
192
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llama_cpp.adapter import (
|
|
193
|
+
LlamaCppAPIAdapter,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Get optional local mode parameters (will be None if not set)
|
|
197
|
+
# TODO: refactor llm_config to include these parameters, currently they cannot be defined and defaults are used
|
|
198
|
+
model_path = getattr(llm_config, "llama_cpp_model_path", None)
|
|
199
|
+
n_ctx = getattr(llm_config, "llama_cpp_n_ctx", 2048)
|
|
200
|
+
n_gpu_layers = getattr(llm_config, "llama_cpp_n_gpu_layers", 0)
|
|
201
|
+
chat_format = getattr(llm_config, "llama_cpp_chat_format", "chatml")
|
|
202
|
+
|
|
203
|
+
return LlamaCppAPIAdapter(
|
|
204
|
+
model=llm_config.llm_model,
|
|
205
|
+
max_completion_tokens=max_completion_tokens,
|
|
206
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
207
|
+
endpoint=llm_config.llm_endpoint,
|
|
208
|
+
api_key=llm_config.llm_api_key,
|
|
209
|
+
model_path=model_path,
|
|
210
|
+
n_ctx=n_ctx,
|
|
211
|
+
n_gpu_layers=n_gpu_layers,
|
|
212
|
+
chat_format=chat_format,
|
|
213
|
+
)
|
|
190
214
|
else:
|
|
191
215
|
raise UnsupportedLLMProviderError(provider)
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Adapter for Instructor-backed Structured Output Framework for Llama CPP"""
|
|
2
|
+
|
|
3
|
+
import litellm
|
|
4
|
+
import logging
|
|
5
|
+
import instructor
|
|
6
|
+
from typing import Type, Optional
|
|
7
|
+
from openai import AsyncOpenAI
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
11
|
+
LLMInterface,
|
|
12
|
+
)
|
|
13
|
+
from cognee.shared.logging_utils import get_logger
|
|
14
|
+
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
|
15
|
+
|
|
16
|
+
from tenacity import (
|
|
17
|
+
retry,
|
|
18
|
+
stop_after_delay,
|
|
19
|
+
wait_exponential_jitter,
|
|
20
|
+
retry_if_not_exception_type,
|
|
21
|
+
before_sleep_log,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
logger = get_logger()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LlamaCppAPIAdapter(LLMInterface):
|
|
28
|
+
"""
|
|
29
|
+
Adapter for Llama CPP LLM provider with support for TWO modes:
|
|
30
|
+
|
|
31
|
+
1. SERVER MODE (OpenAI-compatible):
|
|
32
|
+
- Connects to llama-cpp-python server via HTTP (local or remote)
|
|
33
|
+
- Uses instructor.from_openai()
|
|
34
|
+
- Requires: endpoint, api_key, model
|
|
35
|
+
|
|
36
|
+
2. LOCAL MODE (In-process):
|
|
37
|
+
- Loads model directly using llama-cpp-python library
|
|
38
|
+
- Uses instructor.patch() on llama.Llama object
|
|
39
|
+
- Requires: model_path
|
|
40
|
+
|
|
41
|
+
Public methods:
|
|
42
|
+
- acreate_structured_output
|
|
43
|
+
|
|
44
|
+
Instance variables:
|
|
45
|
+
- name
|
|
46
|
+
- model (for server mode) or model_path (for local mode)
|
|
47
|
+
- mode_type: "server" or "local"
|
|
48
|
+
- max_completion_tokens
|
|
49
|
+
- aclient
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
model: Optional[str]
|
|
54
|
+
model_path: Optional[str]
|
|
55
|
+
mode_type: str # "server" or "local"
|
|
56
|
+
default_instructor_mode = instructor.Mode.JSON
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
name: str = "LlamaCpp",
|
|
61
|
+
max_completion_tokens: int = 2048,
|
|
62
|
+
instructor_mode: Optional[str] = None,
|
|
63
|
+
# Server mode parameters
|
|
64
|
+
endpoint: Optional[str] = None,
|
|
65
|
+
api_key: Optional[str] = None,
|
|
66
|
+
model: Optional[str] = None,
|
|
67
|
+
# Local mode parameters
|
|
68
|
+
model_path: Optional[str] = None,
|
|
69
|
+
n_ctx: int = 2048,
|
|
70
|
+
n_gpu_layers: int = 0,
|
|
71
|
+
chat_format: str = "chatml",
|
|
72
|
+
):
|
|
73
|
+
self.name = name
|
|
74
|
+
self.max_completion_tokens = max_completion_tokens
|
|
75
|
+
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
76
|
+
|
|
77
|
+
# Determine which mode to use
|
|
78
|
+
if model_path:
|
|
79
|
+
self._init_local_mode(model_path, n_ctx, n_gpu_layers, chat_format)
|
|
80
|
+
elif endpoint:
|
|
81
|
+
self._init_server_mode(endpoint, api_key, model)
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
"Must provide either 'model_path' (for local mode) or 'endpoint' (for server mode)"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _init_local_mode(self, model_path: str, n_ctx: int, n_gpu_layers: int, chat_format: str):
|
|
88
|
+
"""Initialize local mode using llama-cpp-python library directly"""
|
|
89
|
+
try:
|
|
90
|
+
import llama_cpp
|
|
91
|
+
except ImportError:
|
|
92
|
+
raise ImportError(
|
|
93
|
+
"llama-cpp-python is not installed. Install with: pip install llama-cpp-python"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
logger.info(f"Initializing LlamaCpp in LOCAL mode with model: {model_path}")
|
|
97
|
+
|
|
98
|
+
self.mode_type = "local"
|
|
99
|
+
self.model_path = model_path
|
|
100
|
+
self.model = None
|
|
101
|
+
|
|
102
|
+
# Initialize llama-cpp-python with the model
|
|
103
|
+
self.llama = llama_cpp.Llama(
|
|
104
|
+
model_path=model_path,
|
|
105
|
+
n_gpu_layers=n_gpu_layers, # -1 for all GPU, 0 for CPU only
|
|
106
|
+
chat_format=chat_format,
|
|
107
|
+
n_ctx=n_ctx,
|
|
108
|
+
verbose=False,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
self.aclient = instructor.patch(
|
|
112
|
+
create=self.llama.create_chat_completion_openai_v1,
|
|
113
|
+
mode=instructor.Mode(self.instructor_mode),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def _init_server_mode(self, endpoint: str, api_key: Optional[str], model: Optional[str]):
|
|
117
|
+
"""Initialize server mode connecting to llama-cpp-python server"""
|
|
118
|
+
logger.info(f"Initializing LlamaCpp in SERVER mode with endpoint: {endpoint}")
|
|
119
|
+
|
|
120
|
+
self.mode_type = "server"
|
|
121
|
+
self.model = model
|
|
122
|
+
self.model_path = None
|
|
123
|
+
self.endpoint = endpoint
|
|
124
|
+
self.api_key = api_key
|
|
125
|
+
|
|
126
|
+
# Use instructor.from_openai() for server mode (OpenAI-compatible API)
|
|
127
|
+
self.aclient = instructor.from_openai(
|
|
128
|
+
AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key),
|
|
129
|
+
mode=instructor.Mode(self.instructor_mode),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@retry(
|
|
133
|
+
stop=stop_after_delay(128),
|
|
134
|
+
wait=wait_exponential_jitter(8, 128),
|
|
135
|
+
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
|
136
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
137
|
+
reraise=True,
|
|
138
|
+
)
|
|
139
|
+
async def acreate_structured_output(
|
|
140
|
+
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
|
|
141
|
+
) -> BaseModel:
|
|
142
|
+
"""
|
|
143
|
+
Generate a structured output from the LLM using the provided text and system prompt.
|
|
144
|
+
|
|
145
|
+
Works in both local and server modes transparently.
|
|
146
|
+
|
|
147
|
+
Parameters:
|
|
148
|
+
-----------
|
|
149
|
+
- text_input (str): The input text provided by the user.
|
|
150
|
+
- system_prompt (str): The system prompt that guides the response generation.
|
|
151
|
+
- response_model (Type[BaseModel]): The model type that the response should conform to.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
--------
|
|
155
|
+
- BaseModel: A structured output that conforms to the specified response model.
|
|
156
|
+
"""
|
|
157
|
+
async with llm_rate_limiter_context_manager():
|
|
158
|
+
# Prepare messages (system first, then user is more standard)
|
|
159
|
+
messages = [
|
|
160
|
+
{"role": "system", "content": system_prompt},
|
|
161
|
+
{"role": "user", "content": text_input},
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
if self.mode_type == "server":
|
|
165
|
+
# Server mode: use async client with OpenAI-compatible API
|
|
166
|
+
response = await self.aclient.chat.completions.create(
|
|
167
|
+
model=self.model,
|
|
168
|
+
messages=messages,
|
|
169
|
+
response_model=response_model,
|
|
170
|
+
max_retries=2,
|
|
171
|
+
max_completion_tokens=self.max_completion_tokens,
|
|
172
|
+
**kwargs,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
else:
|
|
176
|
+
import asyncio
|
|
177
|
+
|
|
178
|
+
# Local mode: instructor.patch() returns a SYNC callable
|
|
179
|
+
# Per docs: https://python.useinstructor.com/integrations/llama-cpp-python/
|
|
180
|
+
def _call_sync():
|
|
181
|
+
return self.aclient(
|
|
182
|
+
messages=messages,
|
|
183
|
+
response_model=response_model,
|
|
184
|
+
max_tokens=self.max_completion_tokens,
|
|
185
|
+
**kwargs,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Run sync function in thread pool to avoid blocking
|
|
189
|
+
response = await asyncio.to_thread(_call_sync)
|
|
190
|
+
|
|
191
|
+
return response
|
cognee/modules/cognify/config.py
CHANGED
|
@@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings):
|
|
|
9
9
|
classification_model: object = DefaultContentPrediction
|
|
10
10
|
summarization_model: object = SummarizedContent
|
|
11
11
|
triplet_embedding: bool = False
|
|
12
|
+
chunks_per_batch: Optional[int] = None
|
|
12
13
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
13
14
|
|
|
14
15
|
def to_dict(self) -> dict:
|
|
@@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings):
|
|
|
16
17
|
"classification_model": self.classification_model,
|
|
17
18
|
"summarization_model": self.summarization_model,
|
|
18
19
|
"triplet_embedding": self.triplet_embedding,
|
|
20
|
+
"chunks_per_batch": self.chunks_per_batch,
|
|
19
21
|
}
|
|
20
22
|
|
|
21
23
|
|
|
@@ -36,6 +36,7 @@ class Data(Base):
|
|
|
36
36
|
data_size = Column(Integer, nullable=True) # File size in bytes
|
|
37
37
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
38
38
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
|
39
|
+
last_accessed = Column(DateTime(timezone=True), nullable=True)
|
|
39
40
|
|
|
40
41
|
datasets = relationship(
|
|
41
42
|
"Dataset",
|