cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
- cognee/api/v1/memify/routers/get_memify_router.py +3 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +21 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +3 -1
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +32 -33
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -222
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
- cognee/tests/integration/retrieval/test_structured_output.py +258 -0
- cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +345 -205
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +96 -20
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/alembic.ini
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# A generic, single database configuration.
|
|
2
|
+
|
|
3
|
+
[alembic]
|
|
4
|
+
# path to migration scripts
|
|
5
|
+
# Use forward slashes (/) also on windows to provide an os agnostic path
|
|
6
|
+
script_location = alembic
|
|
7
|
+
|
|
8
|
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
9
|
+
# Uncomment the line below if you want the files to be prepended with date and time
|
|
10
|
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
11
|
+
# for all available tokens
|
|
12
|
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
13
|
+
|
|
14
|
+
# sys.path path, will be prepended to sys.path if present.
|
|
15
|
+
# defaults to the current working directory.
|
|
16
|
+
prepend_sys_path = .
|
|
17
|
+
|
|
18
|
+
# timezone to use when rendering the date within the migration file
|
|
19
|
+
# as well as the filename.
|
|
20
|
+
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
|
21
|
+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
22
|
+
# string value is passed to ZoneInfo()
|
|
23
|
+
# leave blank for localtime
|
|
24
|
+
# timezone =
|
|
25
|
+
|
|
26
|
+
# max length of characters to apply to the "slug" field
|
|
27
|
+
# truncate_slug_length = 40
|
|
28
|
+
|
|
29
|
+
# set to 'true' to run the environment during
|
|
30
|
+
# the 'revision' command, regardless of autogenerate
|
|
31
|
+
# revision_environment = false
|
|
32
|
+
|
|
33
|
+
# set to 'true' to allow .pyc and .pyo files without
|
|
34
|
+
# a source .py file to be detected as revisions in the
|
|
35
|
+
# versions/ directory
|
|
36
|
+
# sourceless = false
|
|
37
|
+
|
|
38
|
+
# version location specification; This defaults
|
|
39
|
+
# to alembic/versions. When using multiple version
|
|
40
|
+
# directories, initial revisions must be specified with --version-path.
|
|
41
|
+
# The path separator used here should be the separator specified by "version_path_separator" below.
|
|
42
|
+
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
|
43
|
+
|
|
44
|
+
# version path separator; As mentioned above, this is the character used to split
|
|
45
|
+
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
|
46
|
+
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
|
47
|
+
# Valid values for version_path_separator are:
|
|
48
|
+
#
|
|
49
|
+
# version_path_separator = :
|
|
50
|
+
# version_path_separator = ;
|
|
51
|
+
# version_path_separator = space
|
|
52
|
+
# version_path_separator = newline
|
|
53
|
+
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
|
54
|
+
|
|
55
|
+
# set to 'true' to search source files recursively
|
|
56
|
+
# in each "version_locations" directory
|
|
57
|
+
# new in Alembic version 1.10
|
|
58
|
+
# recursive_version_locations = false
|
|
59
|
+
|
|
60
|
+
# the output encoding used when revision files
|
|
61
|
+
# are written from script.py.mako
|
|
62
|
+
# output_encoding = utf-8
|
|
63
|
+
|
|
64
|
+
sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
[post_write_hooks]
|
|
68
|
+
# post_write_hooks defines scripts or Python functions that are run
|
|
69
|
+
# on newly generated revision scripts. See the documentation for further
|
|
70
|
+
# detail and examples
|
|
71
|
+
|
|
72
|
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
73
|
+
# hooks = black
|
|
74
|
+
# black.type = console_scripts
|
|
75
|
+
# black.entrypoint = black
|
|
76
|
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
77
|
+
|
|
78
|
+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
79
|
+
# hooks = ruff
|
|
80
|
+
# ruff.type = exec
|
|
81
|
+
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
82
|
+
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
|
83
|
+
|
|
84
|
+
# Logging configuration
|
|
85
|
+
[loggers]
|
|
86
|
+
keys = root,sqlalchemy,alembic
|
|
87
|
+
|
|
88
|
+
[handlers]
|
|
89
|
+
keys = console
|
|
90
|
+
|
|
91
|
+
[formatters]
|
|
92
|
+
keys = generic
|
|
93
|
+
|
|
94
|
+
[logger_root]
|
|
95
|
+
level = WARN
|
|
96
|
+
handlers = console
|
|
97
|
+
qualname =
|
|
98
|
+
|
|
99
|
+
[logger_sqlalchemy]
|
|
100
|
+
level = WARN
|
|
101
|
+
handlers =
|
|
102
|
+
qualname = sqlalchemy.engine
|
|
103
|
+
|
|
104
|
+
[logger_alembic]
|
|
105
|
+
level = WARN
|
|
106
|
+
handlers =
|
|
107
|
+
qualname = alembic
|
|
108
|
+
|
|
109
|
+
[handler_console]
|
|
110
|
+
class = StreamHandler
|
|
111
|
+
args = (sys.stderr,)
|
|
112
|
+
level = NOTSET
|
|
113
|
+
formatter = generic
|
|
114
|
+
|
|
115
|
+
[formatter_generic]
|
|
116
|
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
117
|
+
datefmt = %H:%M:%S
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -10,13 +10,14 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
|
10
10
|
)
|
|
11
11
|
from cognee.modules.engine.operations.setup import setup
|
|
12
12
|
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
|
13
|
+
from cognee.tasks.ingestion.data_item import DataItem
|
|
13
14
|
from cognee.shared.logging_utils import get_logger
|
|
14
15
|
|
|
15
16
|
logger = get_logger()
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
async def add(
|
|
19
|
-
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
|
20
|
+
data: Union[BinaryIO, list[BinaryIO], str, list[str], DataItem, list[DataItem]],
|
|
20
21
|
dataset_name: str = "main_dataset",
|
|
21
22
|
user: User = None,
|
|
22
23
|
node_set: Optional[List[str]] = None,
|
|
@@ -10,6 +10,7 @@ from cognee.modules.users.methods import get_authenticated_user
|
|
|
10
10
|
from cognee.shared.utils import send_telemetry
|
|
11
11
|
from cognee.modules.pipelines.models import PipelineRunErrored
|
|
12
12
|
from cognee.shared.logging_utils import get_logger
|
|
13
|
+
from cognee.shared.usage_logger import log_usage
|
|
13
14
|
from cognee import __version__ as cognee_version
|
|
14
15
|
|
|
15
16
|
logger = get_logger()
|
|
@@ -19,6 +20,7 @@ def get_add_router() -> APIRouter:
|
|
|
19
20
|
router = APIRouter()
|
|
20
21
|
|
|
21
22
|
@router.post("", response_model=dict)
|
|
23
|
+
@log_usage(function_name="POST /v1/add", log_type="api_endpoint")
|
|
22
24
|
async def add(
|
|
23
25
|
data: List[UploadFile] = File(default=None),
|
|
24
26
|
datasetName: Optional[str] = Form(default=None),
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
252
252
|
chunk_size: int = None,
|
|
253
253
|
config: Config = None,
|
|
254
254
|
custom_prompt: Optional[str] = None,
|
|
255
|
-
chunks_per_batch: int =
|
|
255
|
+
chunks_per_batch: int = None,
|
|
256
256
|
**kwargs,
|
|
257
257
|
) -> list[Task]:
|
|
258
258
|
if config is None:
|
|
@@ -272,12 +272,14 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
272
272
|
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
|
273
273
|
}
|
|
274
274
|
|
|
275
|
-
if chunks_per_batch is None:
|
|
276
|
-
chunks_per_batch = 100
|
|
277
|
-
|
|
278
275
|
cognify_config = get_cognify_config()
|
|
279
276
|
embed_triplets = cognify_config.triplet_embedding
|
|
280
277
|
|
|
278
|
+
if chunks_per_batch is None:
|
|
279
|
+
chunks_per_batch = (
|
|
280
|
+
cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
|
|
281
|
+
)
|
|
282
|
+
|
|
281
283
|
default_tasks = [
|
|
282
284
|
Task(classify_documents),
|
|
283
285
|
Task(
|
|
@@ -308,7 +310,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
308
310
|
|
|
309
311
|
|
|
310
312
|
async def get_temporal_tasks(
|
|
311
|
-
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int =
|
|
313
|
+
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
|
|
312
314
|
) -> list[Task]:
|
|
313
315
|
"""
|
|
314
316
|
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
|
@@ -330,7 +332,10 @@ async def get_temporal_tasks(
|
|
|
330
332
|
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
|
331
333
|
"""
|
|
332
334
|
if chunks_per_batch is None:
|
|
333
|
-
|
|
335
|
+
from cognee.modules.cognify.config import get_cognify_config
|
|
336
|
+
|
|
337
|
+
configured = get_cognify_config().chunks_per_batch
|
|
338
|
+
chunks_per_batch = configured if configured is not None else 10
|
|
334
339
|
|
|
335
340
|
temporal_tasks = [
|
|
336
341
|
Task(classify_documents),
|
|
@@ -29,6 +29,7 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
|
|
|
29
29
|
)
|
|
30
30
|
from cognee.shared.logging_utils import get_logger
|
|
31
31
|
from cognee.shared.utils import send_telemetry
|
|
32
|
+
from cognee.shared.usage_logger import log_usage
|
|
32
33
|
from cognee import __version__ as cognee_version
|
|
33
34
|
|
|
34
35
|
logger = get_logger("api.cognify")
|
|
@@ -46,12 +47,18 @@ class CognifyPayloadDTO(InDTO):
|
|
|
46
47
|
examples=[[]],
|
|
47
48
|
description="Reference to one or more previously uploaded ontologies",
|
|
48
49
|
)
|
|
50
|
+
chunks_per_batch: Optional[int] = Field(
|
|
51
|
+
default=None,
|
|
52
|
+
description="Number of chunks to process per task batch in Cognify (overrides default).",
|
|
53
|
+
examples=[10, 20, 50, 100],
|
|
54
|
+
)
|
|
49
55
|
|
|
50
56
|
|
|
51
57
|
def get_cognify_router() -> APIRouter:
|
|
52
58
|
router = APIRouter()
|
|
53
59
|
|
|
54
60
|
@router.post("", response_model=dict)
|
|
61
|
+
@log_usage(function_name="POST /v1/cognify", log_type="api_endpoint")
|
|
55
62
|
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
56
63
|
"""
|
|
57
64
|
Transform datasets into structured knowledge graphs through cognitive processing.
|
|
@@ -146,6 +153,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
146
153
|
config=config_to_use,
|
|
147
154
|
run_in_background=payload.run_in_background,
|
|
148
155
|
custom_prompt=payload.custom_prompt,
|
|
156
|
+
chunks_per_batch=payload.chunks_per_batch,
|
|
149
157
|
)
|
|
150
158
|
|
|
151
159
|
# If any cognify run errored return JSONResponse with proper error status code
|
cognee/api/v1/config/config.py
CHANGED
|
@@ -10,6 +10,7 @@ from cognee.infrastructure.llm.config import (
|
|
|
10
10
|
get_llm_config,
|
|
11
11
|
)
|
|
12
12
|
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
|
|
13
|
+
from cognee.tasks.translation.config import get_translation_config
|
|
13
14
|
from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
|
|
14
15
|
|
|
15
16
|
|
|
@@ -176,3 +177,62 @@ class config:
|
|
|
176
177
|
def set_vector_db_url(db_url: str):
|
|
177
178
|
vector_db_config = get_vectordb_config()
|
|
178
179
|
vector_db_config.vector_db_url = db_url
|
|
180
|
+
|
|
181
|
+
# Translation configuration methods
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def set_translation_provider(provider: str):
|
|
185
|
+
"""Set the translation provider (llm, google, azure)."""
|
|
186
|
+
translation_config = get_translation_config()
|
|
187
|
+
translation_config.translation_provider = provider
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def set_translation_target_language(target_language: str):
|
|
191
|
+
"""Set the default target language for translations."""
|
|
192
|
+
translation_config = get_translation_config()
|
|
193
|
+
translation_config.target_language = target_language
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def set_translation_config(config_dict: dict):
|
|
197
|
+
"""
|
|
198
|
+
Updates the translation config with values from config_dict.
|
|
199
|
+
"""
|
|
200
|
+
translation_config = get_translation_config()
|
|
201
|
+
for key, value in config_dict.items():
|
|
202
|
+
if hasattr(translation_config, key):
|
|
203
|
+
object.__setattr__(translation_config, key, value)
|
|
204
|
+
else:
|
|
205
|
+
raise InvalidConfigAttributeError(attribute=key)
|
|
206
|
+
|
|
207
|
+
def set(key: str, value):
|
|
208
|
+
"""
|
|
209
|
+
Generic setter that maps configuration keys to their specific setter methods.
|
|
210
|
+
This enables CLI commands like 'cognee config set llm_api_key <value>'.
|
|
211
|
+
"""
|
|
212
|
+
# Map configuration keys to their setter methods
|
|
213
|
+
setter_mapping = {
|
|
214
|
+
"llm_provider": "set_llm_provider",
|
|
215
|
+
"llm_model": "set_llm_model",
|
|
216
|
+
"llm_api_key": "set_llm_api_key",
|
|
217
|
+
"llm_endpoint": "set_llm_endpoint",
|
|
218
|
+
"graph_database_provider": "set_graph_database_provider",
|
|
219
|
+
"vector_db_provider": "set_vector_db_provider",
|
|
220
|
+
"vector_db_url": "set_vector_db_url",
|
|
221
|
+
"vector_db_key": "set_vector_db_key",
|
|
222
|
+
"chunk_size": "set_chunk_size",
|
|
223
|
+
"chunk_overlap": "set_chunk_overlap",
|
|
224
|
+
"chunk_strategy": "set_chunk_strategy",
|
|
225
|
+
"chunk_engine": "set_chunk_engine",
|
|
226
|
+
"classification_model": "set_classification_model",
|
|
227
|
+
"summarization_model": "set_summarization_model",
|
|
228
|
+
"graph_model": "set_graph_model",
|
|
229
|
+
"system_root_directory": "system_root_directory",
|
|
230
|
+
"data_root_directory": "data_root_directory",
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if key not in setter_mapping:
|
|
234
|
+
raise InvalidConfigAttributeError(attribute=key)
|
|
235
|
+
|
|
236
|
+
method_name = setter_mapping[key]
|
|
237
|
+
method = getattr(config, method_name)
|
|
238
|
+
method(value)
|
|
@@ -7,7 +7,9 @@ from fastapi import status
|
|
|
7
7
|
from fastapi import APIRouter
|
|
8
8
|
from fastapi.encoders import jsonable_encoder
|
|
9
9
|
from fastapi import HTTPException, Query, Depends
|
|
10
|
-
from fastapi.responses import JSONResponse, FileResponse
|
|
10
|
+
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse, Response
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
from pathlib import Path
|
|
11
13
|
|
|
12
14
|
from cognee.api.DTO import InDTO, OutDTO
|
|
13
15
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
@@ -44,6 +46,7 @@ class DatasetDTO(OutDTO):
|
|
|
44
46
|
class DataDTO(OutDTO):
|
|
45
47
|
id: UUID
|
|
46
48
|
name: str
|
|
49
|
+
label: Optional[str] = None
|
|
47
50
|
created_at: datetime
|
|
48
51
|
updated_at: Optional[datetime] = None
|
|
49
52
|
extension: str
|
|
@@ -414,7 +417,7 @@ def get_datasets_router() -> APIRouter:
|
|
|
414
417
|
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
|
|
415
418
|
async def get_raw_data(
|
|
416
419
|
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
|
|
417
|
-
):
|
|
420
|
+
) -> Response:
|
|
418
421
|
"""
|
|
419
422
|
Download the raw data file for a specific data item.
|
|
420
423
|
|
|
@@ -475,6 +478,46 @@ def get_datasets_router() -> APIRouter:
|
|
|
475
478
|
message=f"Data ({data_id}) not found in dataset ({dataset_id})."
|
|
476
479
|
)
|
|
477
480
|
|
|
478
|
-
|
|
481
|
+
raw_location = data.raw_data_location
|
|
482
|
+
parsed_uri = urlparse(raw_location)
|
|
483
|
+
|
|
484
|
+
if parsed_uri.scheme == "s3":
|
|
485
|
+
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
486
|
+
from cognee.infrastructure.utils.run_async import run_async
|
|
487
|
+
|
|
488
|
+
download_name = Path(parsed_uri.path).name or data.name
|
|
489
|
+
media_type = data.mime_type or "application/octet-stream"
|
|
490
|
+
|
|
491
|
+
async def file_iterator(chunk_size: int = 1024 * 1024):
|
|
492
|
+
async with open_data_file(raw_location, mode="rb") as file:
|
|
493
|
+
while True:
|
|
494
|
+
chunk = await run_async(file.read, chunk_size)
|
|
495
|
+
if not chunk:
|
|
496
|
+
break
|
|
497
|
+
yield chunk
|
|
498
|
+
|
|
499
|
+
return StreamingResponse(
|
|
500
|
+
file_iterator(),
|
|
501
|
+
media_type=media_type,
|
|
502
|
+
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
if parsed_uri.scheme in ("file", "") or (
|
|
506
|
+
len(parsed_uri.scheme) == 1 and parsed_uri.scheme.isalpha()
|
|
507
|
+
):
|
|
508
|
+
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
|
509
|
+
|
|
510
|
+
file_path = get_data_file_path(raw_location)
|
|
511
|
+
path = Path(file_path)
|
|
512
|
+
|
|
513
|
+
if not path.is_file():
|
|
514
|
+
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
|
515
|
+
|
|
516
|
+
return FileResponse(path=path)
|
|
517
|
+
|
|
518
|
+
raise HTTPException(
|
|
519
|
+
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
|
520
|
+
detail=f"Storage scheme '{parsed_uri.scheme}' not supported for direct download.",
|
|
521
|
+
)
|
|
479
522
|
|
|
480
523
|
return router
|
|
@@ -12,6 +12,7 @@ from cognee.modules.users.methods import get_authenticated_user
|
|
|
12
12
|
from cognee.shared.utils import send_telemetry
|
|
13
13
|
from cognee.modules.pipelines.models import PipelineRunErrored
|
|
14
14
|
from cognee.shared.logging_utils import get_logger
|
|
15
|
+
from cognee.shared.usage_logger import log_usage
|
|
15
16
|
from cognee import __version__ as cognee_version
|
|
16
17
|
|
|
17
18
|
logger = get_logger()
|
|
@@ -35,6 +36,7 @@ def get_memify_router() -> APIRouter:
|
|
|
35
36
|
router = APIRouter()
|
|
36
37
|
|
|
37
38
|
@router.post("", response_model=dict)
|
|
39
|
+
@log_usage(function_name="POST /v1/memify", log_type="api_endpoint")
|
|
38
40
|
async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
39
41
|
"""
|
|
40
42
|
Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
|
|
@@ -90,6 +92,7 @@ def get_memify_router() -> APIRouter:
|
|
|
90
92
|
dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
|
|
91
93
|
node_name=payload.node_name,
|
|
92
94
|
user=user,
|
|
95
|
+
run_in_background=payload.run_in_background,
|
|
93
96
|
)
|
|
94
97
|
|
|
95
98
|
if isinstance(memify_run, PipelineRunErrored):
|
|
@@ -6,14 +6,17 @@ from fastapi import Depends, APIRouter
|
|
|
6
6
|
from fastapi.responses import JSONResponse
|
|
7
7
|
from fastapi.encoders import jsonable_encoder
|
|
8
8
|
|
|
9
|
-
from cognee.modules.search.types import SearchType, SearchResult
|
|
9
|
+
from cognee.modules.search.types import SearchType, SearchResult
|
|
10
10
|
from cognee.api.DTO import InDTO, OutDTO
|
|
11
|
-
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
|
|
11
|
+
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError, UserNotFoundError
|
|
12
12
|
from cognee.modules.users.models import User
|
|
13
13
|
from cognee.modules.search.operations import get_history
|
|
14
14
|
from cognee.modules.users.methods import get_authenticated_user
|
|
15
15
|
from cognee.shared.utils import send_telemetry
|
|
16
|
+
from cognee.shared.usage_logger import log_usage
|
|
16
17
|
from cognee import __version__ as cognee_version
|
|
18
|
+
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
|
19
|
+
from cognee.exceptions import CogneeValidationError
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
# Note: Datasets sent by name will only map to datasets owned by the request sender
|
|
@@ -29,7 +32,7 @@ class SearchPayloadDTO(InDTO):
|
|
|
29
32
|
node_name: Optional[list[str]] = Field(default=None, example=[])
|
|
30
33
|
top_k: Optional[int] = Field(default=10)
|
|
31
34
|
only_context: bool = Field(default=False)
|
|
32
|
-
|
|
35
|
+
verbose: bool = Field(default=False)
|
|
33
36
|
|
|
34
37
|
|
|
35
38
|
def get_search_router() -> APIRouter:
|
|
@@ -72,7 +75,8 @@ def get_search_router() -> APIRouter:
|
|
|
72
75
|
except Exception as error:
|
|
73
76
|
return JSONResponse(status_code=500, content={"error": str(error)})
|
|
74
77
|
|
|
75
|
-
@router.post("", response_model=Union[List[SearchResult],
|
|
78
|
+
@router.post("", response_model=Union[List[SearchResult], List])
|
|
79
|
+
@log_usage(function_name="POST /v1/search", log_type="api_endpoint")
|
|
76
80
|
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
77
81
|
"""
|
|
78
82
|
Search for nodes in the graph database.
|
|
@@ -116,7 +120,7 @@ def get_search_router() -> APIRouter:
|
|
|
116
120
|
"node_name": payload.node_name,
|
|
117
121
|
"top_k": payload.top_k,
|
|
118
122
|
"only_context": payload.only_context,
|
|
119
|
-
"
|
|
123
|
+
"verbose": payload.verbose,
|
|
120
124
|
"cognee_version": cognee_version,
|
|
121
125
|
},
|
|
122
126
|
)
|
|
@@ -133,11 +137,22 @@ def get_search_router() -> APIRouter:
|
|
|
133
137
|
system_prompt=payload.system_prompt,
|
|
134
138
|
node_name=payload.node_name,
|
|
135
139
|
top_k=payload.top_k,
|
|
140
|
+
verbose=payload.verbose,
|
|
136
141
|
only_context=payload.only_context,
|
|
137
|
-
use_combined_context=payload.use_combined_context,
|
|
138
142
|
)
|
|
139
143
|
|
|
140
144
|
return jsonable_encoder(results)
|
|
145
|
+
except (DatabaseNotCreatedError, UserNotFoundError, CogneeValidationError) as e:
|
|
146
|
+
# Return a clear 422 with actionable guidance instead of leaking a stacktrace
|
|
147
|
+
status_code = getattr(e, "status_code", 422)
|
|
148
|
+
return JSONResponse(
|
|
149
|
+
status_code=status_code,
|
|
150
|
+
content={
|
|
151
|
+
"error": "Search prerequisites not met",
|
|
152
|
+
"detail": str(e),
|
|
153
|
+
"hint": "Run `await cognee.add(...)` then `await cognee.cognify()` before searching.",
|
|
154
|
+
},
|
|
155
|
+
)
|
|
141
156
|
except PermissionDeniedError:
|
|
142
157
|
return []
|
|
143
158
|
except Exception as error:
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -4,13 +4,16 @@ from typing import Union, Optional, List, Type
|
|
|
4
4
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
5
5
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
6
6
|
from cognee.modules.users.models import User
|
|
7
|
-
from cognee.modules.search.types import SearchResult, SearchType
|
|
7
|
+
from cognee.modules.search.types import SearchResult, SearchType
|
|
8
8
|
from cognee.modules.users.methods import get_default_user
|
|
9
9
|
from cognee.modules.search.methods import search as search_function
|
|
10
10
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
11
11
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
|
12
12
|
from cognee.context_global_variables import set_session_user_context_variable
|
|
13
13
|
from cognee.shared.logging_utils import get_logger
|
|
14
|
+
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
|
15
|
+
from cognee.exceptions import CogneeValidationError
|
|
16
|
+
from cognee.modules.users.exceptions.exceptions import UserNotFoundError
|
|
14
17
|
|
|
15
18
|
logger = get_logger()
|
|
16
19
|
|
|
@@ -29,12 +32,12 @@ async def search(
|
|
|
29
32
|
save_interaction: bool = False,
|
|
30
33
|
last_k: Optional[int] = 1,
|
|
31
34
|
only_context: bool = False,
|
|
32
|
-
use_combined_context: bool = False,
|
|
33
35
|
session_id: Optional[str] = None,
|
|
34
36
|
wide_search_top_k: Optional[int] = 100,
|
|
35
37
|
triplet_distance_penalty: Optional[float] = 3.5,
|
|
36
38
|
verbose: bool = False,
|
|
37
|
-
|
|
39
|
+
retriever_specific_config: Optional[dict] = None,
|
|
40
|
+
) -> List[SearchResult]:
|
|
38
41
|
"""
|
|
39
42
|
Search and query the knowledge graph for insights, information, and connections.
|
|
40
43
|
|
|
@@ -126,6 +129,8 @@ async def search(
|
|
|
126
129
|
|
|
127
130
|
verbose: If True, returns detailed result information including graph representation (when possible).
|
|
128
131
|
|
|
132
|
+
retriever_specific_config: Optional dictionary of additional configuration parameters specific to the retriever being used.
|
|
133
|
+
|
|
129
134
|
Returns:
|
|
130
135
|
list: Search results in format determined by query_type:
|
|
131
136
|
|
|
@@ -179,7 +184,18 @@ async def search(
|
|
|
179
184
|
datasets = [datasets]
|
|
180
185
|
|
|
181
186
|
if user is None:
|
|
182
|
-
|
|
187
|
+
try:
|
|
188
|
+
user = await get_default_user()
|
|
189
|
+
except (DatabaseNotCreatedError, UserNotFoundError) as error:
|
|
190
|
+
# Provide a clear, actionable message instead of surfacing low-level stacktraces
|
|
191
|
+
raise CogneeValidationError(
|
|
192
|
+
message=(
|
|
193
|
+
"Search prerequisites not met: no database/default user found. "
|
|
194
|
+
"Initialize Cognee before searching by:\n"
|
|
195
|
+
"• running `await cognee.add(...)` followed by `await cognee.cognify()`."
|
|
196
|
+
),
|
|
197
|
+
name="SearchPreconditionError",
|
|
198
|
+
) from error
|
|
183
199
|
|
|
184
200
|
await set_session_user_context_variable(user)
|
|
185
201
|
|
|
@@ -203,11 +219,11 @@ async def search(
|
|
|
203
219
|
save_interaction=save_interaction,
|
|
204
220
|
last_k=last_k,
|
|
205
221
|
only_context=only_context,
|
|
206
|
-
use_combined_context=use_combined_context,
|
|
207
222
|
session_id=session_id,
|
|
208
223
|
wide_search_top_k=wide_search_top_k,
|
|
209
224
|
triplet_distance_penalty=triplet_distance_penalty,
|
|
210
225
|
verbose=verbose,
|
|
226
|
+
retriever_specific_config=retriever_specific_config,
|
|
211
227
|
)
|
|
212
228
|
|
|
213
229
|
return filtered_search_results
|
|
@@ -71,7 +71,7 @@ def get_sync_router() -> APIRouter:
|
|
|
71
71
|
-H "Content-Type: application/json" \\
|
|
72
72
|
-H "Cookie: auth_token=your-token" \\
|
|
73
73
|
-d '{"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]}'
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
# Sync all user datasets (empty request body or null dataset_ids)
|
|
76
76
|
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
|
77
77
|
-H "Content-Type: application/json" \\
|
|
@@ -88,7 +88,7 @@ def get_sync_router() -> APIRouter:
|
|
|
88
88
|
- **413 Payload Too Large**: Dataset too large for current cloud plan
|
|
89
89
|
- **429 Too Many Requests**: Rate limit exceeded
|
|
90
90
|
|
|
91
|
-
## Notes
|
|
91
|
+
## Notes
|
|
92
92
|
- Sync operations run in the background - you get an immediate response
|
|
93
93
|
- Use the returned run_id to track progress (status API coming soon)
|
|
94
94
|
- Large datasets are automatically chunked for efficient transfer
|
|
@@ -179,7 +179,7 @@ def get_sync_router() -> APIRouter:
|
|
|
179
179
|
```
|
|
180
180
|
|
|
181
181
|
## Example Responses
|
|
182
|
-
|
|
182
|
+
|
|
183
183
|
**No running syncs:**
|
|
184
184
|
```json
|
|
185
185
|
{
|
|
@@ -21,7 +21,7 @@ binary streams, then stores them in a specified dataset for further processing.
|
|
|
21
21
|
|
|
22
22
|
Supported Input Types:
|
|
23
23
|
- **Text strings**: Direct text content
|
|
24
|
-
- **File paths**: Local file paths (absolute paths starting with "/")
|
|
24
|
+
- **File paths**: Local file paths (absolute paths starting with "/")
|
|
25
25
|
- **File URLs**: "file:///absolute/path" or "file://relative/path"
|
|
26
26
|
- **S3 paths**: "s3://bucket-name/path/to/file"
|
|
27
27
|
- **Lists**: Multiple files or text strings in a single call
|
|
@@ -62,6 +62,11 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|
|
62
62
|
parser.add_argument(
|
|
63
63
|
"--verbose", "-v", action="store_true", help="Show detailed progress information"
|
|
64
64
|
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--chunks-per-batch",
|
|
67
|
+
type=int,
|
|
68
|
+
help="Number of chunks to process per task batch (try 50 for large single documents).",
|
|
69
|
+
)
|
|
65
70
|
|
|
66
71
|
def execute(self, args: argparse.Namespace) -> None:
|
|
67
72
|
try:
|
|
@@ -111,6 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|
|
111
116
|
chunk_size=args.chunk_size,
|
|
112
117
|
ontology_file_path=args.ontology_file,
|
|
113
118
|
run_in_background=args.background,
|
|
119
|
+
chunks_per_batch=getattr(args, "chunks_per_batch", None),
|
|
114
120
|
)
|
|
115
121
|
return result
|
|
116
122
|
except Exception as e:
|
|
@@ -17,7 +17,7 @@ The `cognee config` command allows you to view and modify configuration settings
|
|
|
17
17
|
|
|
18
18
|
You can:
|
|
19
19
|
- View all current configuration settings
|
|
20
|
-
- Get specific configuration values
|
|
20
|
+
- Get specific configuration values
|
|
21
21
|
- Set configuration values
|
|
22
22
|
- Unset (reset to default) specific configuration values
|
|
23
23
|
- Reset all configuration to defaults
|
|
@@ -121,13 +121,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
121
121
|
)
|
|
122
122
|
|
|
123
123
|
# Set vector and graph database configuration based on dataset database information
|
|
124
|
-
# TODO: Add better handling of vector and graph config
|
|
124
|
+
# TODO: Add better handling of vector and graph config across Cognee.
|
|
125
125
|
# LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
|
|
126
126
|
vector_config = {
|
|
127
127
|
"vector_db_provider": dataset_database.vector_database_provider,
|
|
128
128
|
"vector_db_url": dataset_database.vector_database_url,
|
|
129
129
|
"vector_db_key": dataset_database.vector_database_key,
|
|
130
130
|
"vector_db_name": dataset_database.vector_database_name,
|
|
131
|
+
"vector_db_port": dataset_database.vector_database_connection_info.get("port", ""),
|
|
132
|
+
"vector_db_host": dataset_database.vector_database_connection_info.get("host", ""),
|
|
133
|
+
"vector_db_username": dataset_database.vector_database_connection_info.get("username", ""),
|
|
134
|
+
"vector_db_password": dataset_database.vector_database_connection_info.get("password", ""),
|
|
131
135
|
}
|
|
132
136
|
|
|
133
137
|
graph_config = {
|
|
@@ -32,16 +32,15 @@ class AnswerGeneratorExecutor:
|
|
|
32
32
|
query_text = instance["question"]
|
|
33
33
|
correct_answer = instance["answer"]
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
retrieved_objects = await retriever.get_retrieved_objects(query_text)
|
|
36
|
+
retrieval_context = await retriever.get_context_from_objects(
|
|
37
|
+
query_text, retrieved_objects
|
|
38
|
+
)
|
|
39
|
+
search_results = await retriever.get_completion_from_context(
|
|
40
|
+
query_text, retrieved_objects, retrieval_context
|
|
41
|
+
)
|
|
37
42
|
|
|
38
43
|
############
|
|
39
|
-
#:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
|
|
40
|
-
if isinstance(retrieval_context, list):
|
|
41
|
-
retrieval_context = await retriever.convert_retrieved_objects_to_context(
|
|
42
|
-
triplets=retrieval_context
|
|
43
|
-
)
|
|
44
|
-
|
|
45
44
|
if isinstance(search_results, str):
|
|
46
45
|
search_results = [search_results]
|
|
47
46
|
#############
|