cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
- cognee/api/v1/memify/routers/get_memify_router.py +3 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +21 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +3 -1
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +32 -33
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -222
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
- cognee/tests/integration/retrieval/test_structured_output.py +258 -0
- cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +345 -205
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +96 -20
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,44 +1,50 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
import posixpath
|
|
3
|
+
from urllib.parse import urlparse, unquote
|
|
3
4
|
|
|
4
5
|
|
|
5
|
-
def get_data_file_path(file_path: str):
|
|
6
|
-
|
|
7
|
-
if file_path.startswith("file://"):
|
|
8
|
-
# Remove first occurrence of file:// prefix
|
|
9
|
-
pure_file_path = file_path.replace("file://", "", 1)
|
|
10
|
-
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
|
11
|
-
normalized_file_uri = os.path.normpath(pure_file_path)
|
|
6
|
+
def get_data_file_path(file_path: str) -> str:
|
|
7
|
+
"""Normalize file paths from various URI schemes to filesystem paths.
|
|
12
8
|
|
|
13
|
-
|
|
9
|
+
Handles file://, s3://, and regular filesystem paths. Decodes
|
|
10
|
+
percent-encoded characters and preserves UNC network paths.
|
|
11
|
+
"""
|
|
12
|
+
parsed = urlparse(file_path)
|
|
13
|
+
|
|
14
|
+
if parsed.scheme == "file":
|
|
15
|
+
# file:///path/to/file -> /path/to/file
|
|
16
|
+
fs_path = unquote(parsed.path)
|
|
17
|
+
|
|
18
|
+
if os.name == "nt" and parsed.netloc:
|
|
19
|
+
# Handle UNC paths (file://server/share/...)
|
|
20
|
+
fs_path = f"//{parsed.netloc}{fs_path}"
|
|
21
|
+
|
|
22
|
+
# Normalize the file URI for Windows - handle drive letters correctly
|
|
14
23
|
if os.name == "nt": # Windows
|
|
15
|
-
# Handle Windows drive letters correctly
|
|
16
|
-
fs_path = normalized_file_uri
|
|
24
|
+
# Handle Windows drive letters correctly: /C:/path -> C:/path
|
|
17
25
|
if (
|
|
18
26
|
(fs_path.startswith("/") or fs_path.startswith("\\"))
|
|
19
|
-
and len(fs_path) >
|
|
27
|
+
and len(fs_path) > 2
|
|
20
28
|
and fs_path[2] == ":"
|
|
29
|
+
and fs_path[1].isalpha()
|
|
21
30
|
):
|
|
22
31
|
fs_path = fs_path[1:]
|
|
23
|
-
else:
|
|
24
|
-
# Unix - like systems
|
|
25
|
-
fs_path = normalized_file_uri
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
actual_fs_path = os.path.normpath(fs_path)
|
|
29
|
-
return actual_fs_path
|
|
33
|
+
return os.path.normpath(fs_path)
|
|
30
34
|
|
|
31
|
-
elif
|
|
35
|
+
elif parsed.scheme == "s3":
|
|
32
36
|
# Handle S3 URLs without normalization (which corrupts them)
|
|
33
|
-
|
|
37
|
+
if not parsed.path or parsed.path == "/":
|
|
38
|
+
return f"s3://{parsed.netloc}{parsed.path}"
|
|
34
39
|
|
|
35
|
-
|
|
36
|
-
f"s3://{parsed_url.netloc}{os.sep}{os.path.normpath(parsed_url.path).lstrip(os.sep)}"
|
|
37
|
-
)
|
|
40
|
+
normalized_path = posixpath.normpath(parsed.path).lstrip("/")
|
|
38
41
|
|
|
39
|
-
return
|
|
42
|
+
return f"s3://{parsed.netloc}/{normalized_path}"
|
|
40
43
|
|
|
41
|
-
|
|
44
|
+
elif parsed.scheme == "":
|
|
42
45
|
# Regular file path - normalize separators
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
return os.path.normpath(file_path)
|
|
47
|
+
|
|
48
|
+
else:
|
|
49
|
+
# Other schemes (http, etc.) - return as is or handle as needed
|
|
50
|
+
return file_path
|
|
@@ -37,19 +37,6 @@ class LLMGateway:
|
|
|
37
37
|
**kwargs,
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
@staticmethod
|
|
41
|
-
def create_structured_output(
|
|
42
|
-
text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
43
|
-
) -> BaseModel:
|
|
44
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
|
|
45
|
-
get_llm_client,
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
llm_client = get_llm_client()
|
|
49
|
-
return llm_client.create_structured_output(
|
|
50
|
-
text_input=text_input, system_prompt=system_prompt, response_model=response_model
|
|
51
|
-
)
|
|
52
|
-
|
|
53
40
|
@staticmethod
|
|
54
41
|
def create_transcript(input) -> Coroutine:
|
|
55
42
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
|
|
@@ -10,4 +10,4 @@ Extraction rules:
|
|
|
10
10
|
5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
|
|
11
11
|
6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
|
|
12
12
|
7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
|
|
13
|
-
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
|
13
|
+
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
|
@@ -22,4 +22,4 @@ The `attributes` should be a list of dictionaries, each containing:
|
|
|
22
22
|
- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
|
|
23
23
|
- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
|
|
24
24
|
- You can combine two words to form a relationship name: subject_role, previous_owner, etc.
|
|
25
|
-
- Focus on how the entity specifically relates to the event
|
|
25
|
+
- Focus on how the entity specifically relates to the event
|
|
@@ -19,8 +19,8 @@ The aim is to achieve simplicity and clarity in the knowledge graph.
|
|
|
19
19
|
- **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.
|
|
20
20
|
# 3. Coreference Resolution
|
|
21
21
|
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
|
|
22
|
-
If an entity,
|
|
23
|
-
always use the most complete identifier for that entity throughout the knowledge graph.
|
|
22
|
+
If an entity, is mentioned multiple times in the text but is referred to by different names or pronouns,
|
|
23
|
+
always use the most complete identifier for that entity throughout the knowledge graph.
|
|
24
24
|
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
|
|
25
25
|
# 4. Strict Compliance
|
|
26
26
|
Adhere to the rules strictly. Non-compliance will result in termination
|
|
@@ -22,7 +22,7 @@ You are an advanced algorithm designed to extract structured information to buil
|
|
|
22
22
|
3. **Coreference Resolution**:
|
|
23
23
|
- Maintain one consistent node ID for each real-world entity.
|
|
24
24
|
- Resolve aliases, acronyms, and pronouns to the most complete form.
|
|
25
|
-
- *Example*: Always use
|
|
25
|
+
- *Example*: Always use full identifier even if later referred to as in a similar but slightly different way
|
|
26
26
|
|
|
27
27
|
**Property & Data Guidelines**:
|
|
28
28
|
|
|
@@ -42,10 +42,10 @@ You are an advanced algorithm designed to extract structured information from un
|
|
|
42
42
|
- **Rule**: Resolve all aliases, acronyms, and pronouns to one canonical identifier.
|
|
43
43
|
|
|
44
44
|
> **One-Shot Example**:
|
|
45
|
-
> **Input**: "
|
|
45
|
+
> **Input**: "X is an author. Later, Doe published a book. He is well-known."
|
|
46
46
|
> **Output Node**:
|
|
47
47
|
> ```
|
|
48
|
-
>
|
|
48
|
+
> X (Person)
|
|
49
49
|
> ```
|
|
50
50
|
|
|
51
51
|
---
|
|
@@ -15,7 +15,7 @@ You are an advanced algorithm that extracts structured data into a knowledge gra
|
|
|
15
15
|
- Properties are key-value pairs; do not use escaped quotes.
|
|
16
16
|
|
|
17
17
|
3. **Coreference Resolution**
|
|
18
|
-
- Use a single, complete identifier for each entity
|
|
18
|
+
- Use a single, complete identifier for each entity
|
|
19
19
|
|
|
20
20
|
4. **Relationship Labels**:
|
|
21
21
|
- Use descriptive, lowercase, snake_case names for edges.
|
|
@@ -26,7 +26,7 @@ Use **basic atomic types** for node labels. Always prefer general types over spe
|
|
|
26
26
|
- Good: "Alan Turing", "Google Inc.", "World War II"
|
|
27
27
|
- Bad: "Entity_001", "1234", "he", "they"
|
|
28
28
|
- Never use numeric or autogenerated IDs.
|
|
29
|
-
- Prioritize **most complete form** of entity names for consistency
|
|
29
|
+
- Prioritize **most complete form** of entity names for consistency
|
|
30
30
|
|
|
31
31
|
2. Dates, Numbers, and Properties
|
|
32
32
|
---------------------------------
|
|
@@ -2,12 +2,12 @@ You are an expert query analyzer for a **GraphRAG system**. Your primary goal is
|
|
|
2
2
|
|
|
3
3
|
Here are the available `SearchType` tools and their specific functions:
|
|
4
4
|
|
|
5
|
-
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
|
5
|
+
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
|
6
6
|
|
|
7
|
-
**Best for:**
|
|
7
|
+
**Best for:**
|
|
8
8
|
|
|
9
|
-
- Getting concise overviews of topics
|
|
10
|
-
- Summarizing large amounts of information
|
|
9
|
+
- Getting concise overviews of topics
|
|
10
|
+
- Summarizing large amounts of information
|
|
11
11
|
- Quick understanding of complex subjects
|
|
12
12
|
|
|
13
13
|
**Best for:**
|
|
@@ -16,7 +16,7 @@ Here are the available `SearchType` tools and their specific functions:
|
|
|
16
16
|
- Understanding relationships between concepts
|
|
17
17
|
- Exploring the structure of your knowledge graph
|
|
18
18
|
|
|
19
|
-
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
|
19
|
+
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
|
20
20
|
|
|
21
21
|
**Best for:**
|
|
22
22
|
|
|
@@ -122,4 +122,4 @@ Response: `NATURAL_LANGUAGE`
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
|
125
|
+
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
Respond with: test
|
|
1
|
+
Respond with: test
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
You are an expert translator with deep knowledge of languages, cultures, and linguistics.
|
|
2
|
+
|
|
3
|
+
Your task is to:
|
|
4
|
+
1. Detect the source language of the provided text if not specified
|
|
5
|
+
2. Translate the text accurately to the target language
|
|
6
|
+
3. Preserve the original meaning, tone, and intent
|
|
7
|
+
4. Maintain proper grammar and natural phrasing in the target language
|
|
8
|
+
|
|
9
|
+
Guidelines:
|
|
10
|
+
- Preserve technical terms, proper nouns, and specialized vocabulary appropriately
|
|
11
|
+
- Maintain formatting such as paragraphs, lists, and emphasis where applicable
|
|
12
|
+
- If the text contains code, URLs, or other non-translatable content, preserve them as-is
|
|
13
|
+
- Handle idioms and cultural references thoughtfully, adapting when necessary
|
|
14
|
+
- Ensure the translation reads naturally to a native speaker of the target language
|
|
15
|
+
|
|
16
|
+
Provide the translation in a structured format with:
|
|
17
|
+
- The translated text
|
|
18
|
+
- The detected source language (ISO 639-1 code like "en", "es", "fr", "de", etc.)
|
|
19
|
+
- Any notes about the translation (optional, for ambiguous terms or cultural adaptations)
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
CHANGED
|
@@ -3,7 +3,9 @@ from typing import Type
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
import litellm
|
|
5
5
|
import instructor
|
|
6
|
+
import anthropic
|
|
6
7
|
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
from cognee.modules.observability.get_observe import get_observe
|
|
7
9
|
from tenacity import (
|
|
8
10
|
retry,
|
|
9
11
|
stop_after_delay,
|
|
@@ -12,38 +14,41 @@ from tenacity import (
|
|
|
12
14
|
before_sleep_log,
|
|
13
15
|
)
|
|
14
16
|
|
|
15
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.
|
|
16
|
-
|
|
17
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
18
|
+
GenericAPIAdapter,
|
|
17
19
|
)
|
|
18
20
|
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
|
19
21
|
from cognee.infrastructure.llm.config import get_llm_config
|
|
20
22
|
|
|
21
23
|
logger = get_logger()
|
|
24
|
+
observe = get_observe()
|
|
22
25
|
|
|
23
26
|
|
|
24
|
-
class AnthropicAdapter(
|
|
27
|
+
class AnthropicAdapter(GenericAPIAdapter):
|
|
25
28
|
"""
|
|
26
29
|
Adapter for interfacing with the Anthropic API, enabling structured output generation
|
|
27
30
|
and prompt display.
|
|
28
31
|
"""
|
|
29
32
|
|
|
30
|
-
name = "Anthropic"
|
|
31
|
-
model: str
|
|
32
33
|
default_instructor_mode = "anthropic_tools"
|
|
33
34
|
|
|
34
|
-
def __init__(
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
def __init__(
|
|
36
|
+
self, api_key: str, model: str, max_completion_tokens: int, instructor_mode: str = None
|
|
37
|
+
):
|
|
38
|
+
super().__init__(
|
|
39
|
+
api_key=api_key,
|
|
40
|
+
model=model,
|
|
41
|
+
max_completion_tokens=max_completion_tokens,
|
|
42
|
+
name="Anthropic",
|
|
43
|
+
)
|
|
37
44
|
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
38
45
|
|
|
39
46
|
self.aclient = instructor.patch(
|
|
40
|
-
create=anthropic.AsyncAnthropic(api_key=
|
|
47
|
+
create=anthropic.AsyncAnthropic(api_key=self.api_key).messages.create,
|
|
41
48
|
mode=instructor.Mode(self.instructor_mode),
|
|
42
49
|
)
|
|
43
50
|
|
|
44
|
-
|
|
45
|
-
self.max_completion_tokens = max_completion_tokens
|
|
46
|
-
|
|
51
|
+
@observe(as_type="generation")
|
|
47
52
|
@retry(
|
|
48
53
|
stop=stop_after_delay(128),
|
|
49
54
|
wait=wait_exponential_jitter(8, 128),
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Adapter for
|
|
1
|
+
"""Adapter for Gemini API LLM provider"""
|
|
2
2
|
|
|
3
3
|
import litellm
|
|
4
4
|
import instructor
|
|
@@ -8,13 +8,9 @@ from openai import ContentFilterFinishReasonError
|
|
|
8
8
|
from litellm.exceptions import ContentPolicyViolationError
|
|
9
9
|
from instructor.core import InstructorRetryException
|
|
10
10
|
|
|
11
|
-
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|
12
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
13
|
-
LLMInterface,
|
|
14
|
-
)
|
|
15
11
|
import logging
|
|
16
12
|
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
|
17
|
-
|
|
13
|
+
|
|
18
14
|
from tenacity import (
|
|
19
15
|
retry,
|
|
20
16
|
stop_after_delay,
|
|
@@ -23,55 +19,65 @@ from tenacity import (
|
|
|
23
19
|
before_sleep_log,
|
|
24
20
|
)
|
|
25
21
|
|
|
22
|
+
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|
23
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
24
|
+
GenericAPIAdapter,
|
|
25
|
+
)
|
|
26
|
+
from cognee.shared.logging_utils import get_logger
|
|
27
|
+
from cognee.modules.observability.get_observe import get_observe
|
|
28
|
+
|
|
26
29
|
logger = get_logger()
|
|
30
|
+
observe = get_observe()
|
|
27
31
|
|
|
28
32
|
|
|
29
|
-
class GeminiAdapter(
|
|
33
|
+
class GeminiAdapter(GenericAPIAdapter):
|
|
30
34
|
"""
|
|
31
35
|
Adapter for Gemini API LLM provider.
|
|
32
36
|
|
|
33
37
|
This class initializes the API adapter with necessary credentials and configurations for
|
|
34
38
|
interacting with the gemini LLM models. It provides methods for creating structured outputs
|
|
35
|
-
based on user input and system prompts.
|
|
39
|
+
based on user input and system prompts, as well as multimodal processing capabilities.
|
|
36
40
|
|
|
37
41
|
Public methods:
|
|
38
|
-
- acreate_structured_output(text_input: str, system_prompt: str, response_model:
|
|
39
|
-
|
|
42
|
+
- acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel
|
|
43
|
+
- create_transcript(input) -> BaseModel: Transcribe audio files to text
|
|
44
|
+
- transcribe_image(input) -> BaseModel: Inherited from GenericAPIAdapter
|
|
40
45
|
"""
|
|
41
46
|
|
|
42
|
-
name: str
|
|
43
|
-
model: str
|
|
44
|
-
api_key: str
|
|
45
47
|
default_instructor_mode = "json_mode"
|
|
46
48
|
|
|
47
49
|
def __init__(
|
|
48
50
|
self,
|
|
49
|
-
endpoint,
|
|
50
51
|
api_key: str,
|
|
51
52
|
model: str,
|
|
52
|
-
api_version: str,
|
|
53
53
|
max_completion_tokens: int,
|
|
54
|
+
endpoint: str = None,
|
|
55
|
+
api_version: str = None,
|
|
56
|
+
transcription_model: str = None,
|
|
54
57
|
instructor_mode: str = None,
|
|
55
58
|
fallback_model: str = None,
|
|
56
59
|
fallback_api_key: str = None,
|
|
57
60
|
fallback_endpoint: str = None,
|
|
58
61
|
):
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
62
|
+
super().__init__(
|
|
63
|
+
api_key=api_key,
|
|
64
|
+
model=model,
|
|
65
|
+
max_completion_tokens=max_completion_tokens,
|
|
66
|
+
name="Gemini",
|
|
67
|
+
endpoint=endpoint,
|
|
68
|
+
api_version=api_version,
|
|
69
|
+
transcription_model=transcription_model,
|
|
70
|
+
fallback_model=fallback_model,
|
|
71
|
+
fallback_api_key=fallback_api_key,
|
|
72
|
+
fallback_endpoint=fallback_endpoint,
|
|
73
|
+
)
|
|
69
74
|
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
70
75
|
|
|
71
76
|
self.aclient = instructor.from_litellm(
|
|
72
77
|
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
|
73
78
|
)
|
|
74
79
|
|
|
80
|
+
@observe(as_type="generation")
|
|
75
81
|
@retry(
|
|
76
82
|
stop=stop_after_delay(128),
|
|
77
83
|
wait=wait_exponential_jitter(8, 128),
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Adapter for Generic API LLM provider API"""
|
|
2
2
|
|
|
3
|
+
import base64
|
|
4
|
+
import mimetypes
|
|
3
5
|
import litellm
|
|
4
6
|
import instructor
|
|
5
|
-
from typing import Type
|
|
7
|
+
from typing import Type, Optional
|
|
6
8
|
from pydantic import BaseModel
|
|
7
9
|
from openai import ContentFilterFinishReasonError
|
|
8
10
|
from litellm.exceptions import ContentPolicyViolationError
|
|
@@ -12,6 +14,8 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|
|
12
14
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
13
15
|
LLMInterface,
|
|
14
16
|
)
|
|
17
|
+
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
18
|
+
from cognee.modules.observability.get_observe import get_observe
|
|
15
19
|
import logging
|
|
16
20
|
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
|
17
21
|
from cognee.shared.logging_utils import get_logger
|
|
@@ -23,7 +27,12 @@ from tenacity import (
|
|
|
23
27
|
before_sleep_log,
|
|
24
28
|
)
|
|
25
29
|
|
|
30
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.types import (
|
|
31
|
+
TranscriptionReturnType,
|
|
32
|
+
)
|
|
33
|
+
|
|
26
34
|
logger = get_logger()
|
|
35
|
+
observe = get_observe()
|
|
27
36
|
|
|
28
37
|
|
|
29
38
|
class GenericAPIAdapter(LLMInterface):
|
|
@@ -39,18 +48,19 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
39
48
|
Type[BaseModel]) -> BaseModel
|
|
40
49
|
"""
|
|
41
50
|
|
|
42
|
-
|
|
43
|
-
model: str
|
|
44
|
-
api_key: str
|
|
51
|
+
MAX_RETRIES = 5
|
|
45
52
|
default_instructor_mode = "json_mode"
|
|
46
53
|
|
|
47
54
|
def __init__(
|
|
48
55
|
self,
|
|
49
|
-
endpoint,
|
|
50
56
|
api_key: str,
|
|
51
57
|
model: str,
|
|
52
|
-
name: str,
|
|
53
58
|
max_completion_tokens: int,
|
|
59
|
+
name: str,
|
|
60
|
+
endpoint: str = None,
|
|
61
|
+
api_version: str = None,
|
|
62
|
+
transcription_model: str = None,
|
|
63
|
+
image_transcribe_model: str = None,
|
|
54
64
|
instructor_mode: str = None,
|
|
55
65
|
fallback_model: str = None,
|
|
56
66
|
fallback_api_key: str = None,
|
|
@@ -59,9 +69,11 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
59
69
|
self.name = name
|
|
60
70
|
self.model = model
|
|
61
71
|
self.api_key = api_key
|
|
72
|
+
self.api_version = api_version
|
|
62
73
|
self.endpoint = endpoint
|
|
63
74
|
self.max_completion_tokens = max_completion_tokens
|
|
64
|
-
|
|
75
|
+
self.transcription_model = transcription_model or model
|
|
76
|
+
self.image_transcribe_model = image_transcribe_model or model
|
|
65
77
|
self.fallback_model = fallback_model
|
|
66
78
|
self.fallback_api_key = fallback_api_key
|
|
67
79
|
self.fallback_endpoint = fallback_endpoint
|
|
@@ -72,6 +84,7 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
72
84
|
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
|
73
85
|
)
|
|
74
86
|
|
|
87
|
+
@observe(as_type="generation")
|
|
75
88
|
@retry(
|
|
76
89
|
stop=stop_after_delay(128),
|
|
77
90
|
wait=wait_exponential_jitter(8, 128),
|
|
@@ -173,3 +186,115 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
173
186
|
raise ContentPolicyFilterError(
|
|
174
187
|
f"The provided input contains content that is not aligned with our content policy: {text_input}"
|
|
175
188
|
) from error
|
|
189
|
+
|
|
190
|
+
@observe(as_type="transcription")
|
|
191
|
+
@retry(
|
|
192
|
+
stop=stop_after_delay(128),
|
|
193
|
+
wait=wait_exponential_jitter(2, 128),
|
|
194
|
+
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
|
195
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
196
|
+
reraise=True,
|
|
197
|
+
)
|
|
198
|
+
async def create_transcript(self, input) -> TranscriptionReturnType:
|
|
199
|
+
"""
|
|
200
|
+
Generate an audio transcript from a user query.
|
|
201
|
+
|
|
202
|
+
This method creates a transcript from the specified audio file, raising a
|
|
203
|
+
FileNotFoundError if the file does not exist. The audio file is processed and the
|
|
204
|
+
transcription is retrieved from the API.
|
|
205
|
+
|
|
206
|
+
Parameters:
|
|
207
|
+
-----------
|
|
208
|
+
- input: The path to the audio file that needs to be transcribed.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
--------
|
|
212
|
+
The generated transcription of the audio file.
|
|
213
|
+
"""
|
|
214
|
+
async with open_data_file(input, mode="rb") as audio_file:
|
|
215
|
+
encoded_string = base64.b64encode(audio_file.read()).decode("utf-8")
|
|
216
|
+
mime_type, _ = mimetypes.guess_type(input)
|
|
217
|
+
if not mime_type or not mime_type.startswith("audio/"):
|
|
218
|
+
raise ValueError(
|
|
219
|
+
f"Could not determine MIME type for audio file: {input}. Is the extension correct?"
|
|
220
|
+
)
|
|
221
|
+
response = await litellm.acompletion(
|
|
222
|
+
model=self.transcription_model,
|
|
223
|
+
messages=[
|
|
224
|
+
{
|
|
225
|
+
"role": "user",
|
|
226
|
+
"content": [
|
|
227
|
+
{
|
|
228
|
+
"type": "file",
|
|
229
|
+
"file": {"file_data": f"data:{mime_type};base64,{encoded_string}"},
|
|
230
|
+
},
|
|
231
|
+
{"type": "text", "text": "Transcribe the following audio precisely."},
|
|
232
|
+
],
|
|
233
|
+
}
|
|
234
|
+
],
|
|
235
|
+
api_key=self.api_key,
|
|
236
|
+
api_version=self.api_version,
|
|
237
|
+
max_completion_tokens=self.max_completion_tokens,
|
|
238
|
+
api_base=self.endpoint,
|
|
239
|
+
max_retries=self.MAX_RETRIES,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return TranscriptionReturnType(response.choices[0].message.content, response)
|
|
243
|
+
|
|
244
|
+
@observe(as_type="transcribe_image")
|
|
245
|
+
@retry(
|
|
246
|
+
stop=stop_after_delay(128),
|
|
247
|
+
wait=wait_exponential_jitter(2, 128),
|
|
248
|
+
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
|
249
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
250
|
+
reraise=True,
|
|
251
|
+
)
|
|
252
|
+
async def transcribe_image(self, input) -> BaseModel:
|
|
253
|
+
"""
|
|
254
|
+
Generate a transcription of an image from a user query.
|
|
255
|
+
|
|
256
|
+
This method encodes the image and sends a request to the API to obtain a
|
|
257
|
+
description of the contents of the image.
|
|
258
|
+
|
|
259
|
+
Parameters:
|
|
260
|
+
-----------
|
|
261
|
+
- input: The path to the image file that needs to be transcribed.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
--------
|
|
265
|
+
- BaseModel: A structured output generated by the model, returned as an instance of
|
|
266
|
+
BaseModel.
|
|
267
|
+
"""
|
|
268
|
+
async with open_data_file(input, mode="rb") as image_file:
|
|
269
|
+
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
270
|
+
mime_type, _ = mimetypes.guess_type(input)
|
|
271
|
+
if not mime_type or not mime_type.startswith("image/"):
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"Could not determine MIME type for image file: {input}. Is the extension correct?"
|
|
274
|
+
)
|
|
275
|
+
response = await litellm.acompletion(
|
|
276
|
+
model=self.image_transcribe_model,
|
|
277
|
+
messages=[
|
|
278
|
+
{
|
|
279
|
+
"role": "user",
|
|
280
|
+
"content": [
|
|
281
|
+
{
|
|
282
|
+
"type": "text",
|
|
283
|
+
"text": "What's in this image?",
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
"type": "image_url",
|
|
287
|
+
"image_url": {
|
|
288
|
+
"url": f"data:{mime_type};base64,{encoded_image}",
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
],
|
|
292
|
+
}
|
|
293
|
+
],
|
|
294
|
+
api_key=self.api_key,
|
|
295
|
+
api_base=self.endpoint,
|
|
296
|
+
api_version=self.api_version,
|
|
297
|
+
max_completion_tokens=300,
|
|
298
|
+
max_retries=self.MAX_RETRIES,
|
|
299
|
+
)
|
|
300
|
+
return response
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
CHANGED
|
@@ -34,6 +34,7 @@ class LLMProvider(Enum):
|
|
|
34
34
|
GEMINI = "gemini"
|
|
35
35
|
MISTRAL = "mistral"
|
|
36
36
|
BEDROCK = "bedrock"
|
|
37
|
+
LLAMA_CPP = "llama_cpp"
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
def get_llm_client(raise_api_key_error: bool = True):
|
|
@@ -103,7 +104,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
103
104
|
llm_config.llm_api_key,
|
|
104
105
|
llm_config.llm_model,
|
|
105
106
|
"Ollama",
|
|
106
|
-
max_completion_tokens
|
|
107
|
+
max_completion_tokens,
|
|
107
108
|
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
108
109
|
)
|
|
109
110
|
|
|
@@ -113,8 +114,9 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
113
114
|
)
|
|
114
115
|
|
|
115
116
|
return AnthropicAdapter(
|
|
116
|
-
|
|
117
|
-
|
|
117
|
+
llm_config.llm_api_key,
|
|
118
|
+
llm_config.llm_model,
|
|
119
|
+
max_completion_tokens,
|
|
118
120
|
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
119
121
|
)
|
|
120
122
|
|
|
@@ -127,11 +129,10 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
127
129
|
)
|
|
128
130
|
|
|
129
131
|
return GenericAPIAdapter(
|
|
130
|
-
llm_config.llm_endpoint,
|
|
131
132
|
llm_config.llm_api_key,
|
|
132
133
|
llm_config.llm_model,
|
|
134
|
+
max_completion_tokens,
|
|
133
135
|
"Custom",
|
|
134
|
-
max_completion_tokens=max_completion_tokens,
|
|
135
136
|
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
136
137
|
fallback_api_key=llm_config.fallback_api_key,
|
|
137
138
|
fallback_endpoint=llm_config.fallback_endpoint,
|
|
@@ -187,5 +188,28 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
187
188
|
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
188
189
|
)
|
|
189
190
|
|
|
191
|
+
elif provider == LLMProvider.LLAMA_CPP:
|
|
192
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llama_cpp.adapter import (
|
|
193
|
+
LlamaCppAPIAdapter,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Get optional local mode parameters (will be None if not set)
|
|
197
|
+
# TODO: refactor llm_config to include these parameters, currently they cannot be defined and defaults are used
|
|
198
|
+
model_path = getattr(llm_config, "llama_cpp_model_path", None)
|
|
199
|
+
n_ctx = getattr(llm_config, "llama_cpp_n_ctx", 2048)
|
|
200
|
+
n_gpu_layers = getattr(llm_config, "llama_cpp_n_gpu_layers", 0)
|
|
201
|
+
chat_format = getattr(llm_config, "llama_cpp_chat_format", "chatml")
|
|
202
|
+
|
|
203
|
+
return LlamaCppAPIAdapter(
|
|
204
|
+
model=llm_config.llm_model,
|
|
205
|
+
max_completion_tokens=max_completion_tokens,
|
|
206
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
207
|
+
endpoint=llm_config.llm_endpoint,
|
|
208
|
+
api_key=llm_config.llm_api_key,
|
|
209
|
+
model_path=model_path,
|
|
210
|
+
n_ctx=n_ctx,
|
|
211
|
+
n_gpu_layers=n_gpu_layers,
|
|
212
|
+
chat_format=chat_format,
|
|
213
|
+
)
|
|
190
214
|
else:
|
|
191
215
|
raise UnsupportedLLMProviderError(provider)
|