cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Type
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from baml_py import ClientRegistry
|
|
5
|
+
from cognee.shared.logging_utils import get_logger
|
|
6
|
+
from cognee.shared.data_models import SummarizedCode
|
|
7
|
+
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
|
|
8
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = get_logger("extract_summary_baml")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_mock_summarized_code():
|
|
15
|
+
"""Local mock function to avoid circular imports."""
|
|
16
|
+
return SummarizedCode(
|
|
17
|
+
high_level_summary="Mock code summary",
|
|
18
|
+
key_features=["Mock feature 1", "Mock feature 2"],
|
|
19
|
+
imports=["mock_import"],
|
|
20
|
+
constants=["MOCK_CONSTANT"],
|
|
21
|
+
classes=[],
|
|
22
|
+
functions=[],
|
|
23
|
+
workflow_description="Mock workflow description",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def extract_summary(content: str, response_model: Type[BaseModel]):
|
|
28
|
+
"""
|
|
29
|
+
Extract summary using BAML framework.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
content: The content to summarize
|
|
33
|
+
response_model: The Pydantic model type for the response
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
BaseModel: The summarized content in the specified format
|
|
37
|
+
"""
|
|
38
|
+
config = get_llm_config()
|
|
39
|
+
|
|
40
|
+
# Use BAML's SummarizeContent function
|
|
41
|
+
summary_result = await b.SummarizeContent(
|
|
42
|
+
content, baml_options={"client_registry": config.baml_registry}
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Convert BAML result to the expected response model
|
|
46
|
+
if response_model is SummarizedCode:
|
|
47
|
+
# If it's asking for SummarizedCode but we got SummarizedContent,
|
|
48
|
+
# we need to use SummarizeCode instead
|
|
49
|
+
code_result = await b.SummarizeCode(
|
|
50
|
+
content, baml_options={"client_registry": config.baml_registry}
|
|
51
|
+
)
|
|
52
|
+
return code_result
|
|
53
|
+
else:
|
|
54
|
+
# For other models, return the summary result
|
|
55
|
+
return summary_result
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def extract_code_summary(content: str):
|
|
59
|
+
"""
|
|
60
|
+
Extract code summary using BAML framework with mocking support.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
content: The code content to summarize
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
SummarizedCode: The summarized code information
|
|
67
|
+
"""
|
|
68
|
+
enable_mocking = os.getenv("MOCK_CODE_SUMMARY", "false")
|
|
69
|
+
if isinstance(enable_mocking, bool):
|
|
70
|
+
enable_mocking = str(enable_mocking).lower()
|
|
71
|
+
enable_mocking = enable_mocking in ("true", "1", "yes")
|
|
72
|
+
|
|
73
|
+
if enable_mocking:
|
|
74
|
+
result = get_mock_summarized_code()
|
|
75
|
+
return result
|
|
76
|
+
else:
|
|
77
|
+
try:
|
|
78
|
+
config = get_llm_config()
|
|
79
|
+
|
|
80
|
+
result = await b.SummarizeCode(
|
|
81
|
+
content, baml_options={"client_registry": config.baml_registry}
|
|
82
|
+
)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(
|
|
85
|
+
"Failed to extract code summary with BAML, falling back to mock summary", exc_info=e
|
|
86
|
+
)
|
|
87
|
+
result = get_mock_summarized_code()
|
|
88
|
+
|
|
89
|
+
return result
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Type
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
4
|
+
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
5
|
+
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def extract_content_graph(
|
|
9
|
+
content: str, response_model: Type[BaseModel], mode: str = "simple"
|
|
10
|
+
):
|
|
11
|
+
config = get_llm_config()
|
|
12
|
+
setup_logging()
|
|
13
|
+
|
|
14
|
+
get_logger(level="INFO")
|
|
15
|
+
|
|
16
|
+
# if response_model:
|
|
17
|
+
# # tb = TypeBuilder()
|
|
18
|
+
# # country = tb.union \
|
|
19
|
+
# # ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")])
|
|
20
|
+
# # tb.Node.add_property("country", country)
|
|
21
|
+
#
|
|
22
|
+
# graph = await b.ExtractDynamicContentGraph(
|
|
23
|
+
# content, mode=mode, baml_options={"client_registry": baml_registry}
|
|
24
|
+
# )
|
|
25
|
+
#
|
|
26
|
+
# return graph
|
|
27
|
+
|
|
28
|
+
# else:
|
|
29
|
+
graph = await b.ExtractContentGraphGeneric(
|
|
30
|
+
content, mode=mode, baml_options={"client_registry": config.baml_registry}
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return graph
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// This helps use auto generate libraries you can use in the language of
|
|
2
|
+
// your choice. You can have multiple generators if you use multiple languages.
|
|
3
|
+
// Just ensure that the output_dir is different for each generator.
|
|
4
|
+
generator target {
|
|
5
|
+
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
|
|
6
|
+
output_type "python/pydantic"
|
|
7
|
+
|
|
8
|
+
// Where the generated code will be saved (relative to baml_src/)
|
|
9
|
+
output_dir "../baml/"
|
|
10
|
+
|
|
11
|
+
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
|
|
12
|
+
// The BAML VSCode extension version should also match this version.
|
|
13
|
+
version "0.201.0"
|
|
14
|
+
|
|
15
|
+
// Valid values: "sync", "async"
|
|
16
|
+
// This controls what `b.FunctionName()` will be (sync or async).
|
|
17
|
+
default_client_mode sync
|
|
18
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from typing import Type
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
|
|
4
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def extract_categories(content: str, response_model: Type[BaseModel]):
|
|
8
|
+
system_prompt = LLMGateway.read_query_prompt("classify_content.txt")
|
|
9
|
+
|
|
10
|
+
llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
|
|
11
|
+
|
|
12
|
+
return llm_output
|
|
@@ -5,20 +5,29 @@ from typing import Type
|
|
|
5
5
|
from instructor.exceptions import InstructorRetryException
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
|
-
from cognee.infrastructure.llm.
|
|
9
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
8
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
10
9
|
from cognee.shared.data_models import SummarizedCode
|
|
11
|
-
from cognee.tasks.summarization.mock_summary import get_mock_summarized_code
|
|
12
10
|
|
|
13
11
|
logger = get_logger("extract_summary")
|
|
14
12
|
|
|
15
13
|
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
def get_mock_summarized_code():
|
|
15
|
+
"""Local mock function to avoid circular imports."""
|
|
16
|
+
return SummarizedCode(
|
|
17
|
+
high_level_summary="Mock code summary",
|
|
18
|
+
key_features=["Mock feature 1", "Mock feature 2"],
|
|
19
|
+
imports=["mock_import"],
|
|
20
|
+
constants=["MOCK_CONSTANT"],
|
|
21
|
+
classes=[],
|
|
22
|
+
functions=[],
|
|
23
|
+
workflow_description="Mock workflow description",
|
|
24
|
+
)
|
|
25
|
+
|
|
18
26
|
|
|
19
|
-
|
|
27
|
+
async def extract_summary(content: str, response_model: Type[BaseModel]):
|
|
28
|
+
system_prompt = LLMGateway.read_query_prompt("summarize_content.txt")
|
|
20
29
|
|
|
21
|
-
llm_output = await
|
|
30
|
+
llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
|
|
22
31
|
|
|
23
32
|
return llm_output
|
|
24
33
|
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Type
|
|
3
3
|
from pydantic import BaseModel
|
|
4
|
-
|
|
5
|
-
from cognee.infrastructure.llm.
|
|
6
|
-
from cognee.infrastructure.llm.config import
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
6
|
+
from cognee.infrastructure.llm.config import (
|
|
7
|
+
get_llm_config,
|
|
8
|
+
)
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
async def extract_content_graph(content: str, response_model: Type[BaseModel]):
|
|
10
|
-
llm_client = get_llm_client()
|
|
11
12
|
llm_config = get_llm_config()
|
|
12
13
|
|
|
13
14
|
prompt_path = llm_config.graph_prompt_path
|
|
@@ -21,9 +22,9 @@ async def extract_content_graph(content: str, response_model: Type[BaseModel]):
|
|
|
21
22
|
else:
|
|
22
23
|
base_directory = None
|
|
23
24
|
|
|
24
|
-
system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
|
|
25
|
+
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
|
|
25
26
|
|
|
26
|
-
content_graph = await
|
|
27
|
+
content_graph = await LLMGateway.acreate_structured_output(
|
|
27
28
|
content, system_prompt, response_model
|
|
28
29
|
)
|
|
29
30
|
|
|
@@ -3,9 +3,15 @@ from pydantic import BaseModel
|
|
|
3
3
|
import instructor
|
|
4
4
|
|
|
5
5
|
from cognee.exceptions import InvalidValueError
|
|
6
|
-
from cognee.infrastructure.llm.llm_interface import
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
7
|
+
LLMInterface,
|
|
8
|
+
)
|
|
9
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
10
|
+
rate_limit_async,
|
|
11
|
+
sleep_and_retry_async,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
class AnthropicAdapter(LLMInterface):
|
|
@@ -85,7 +91,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
85
91
|
if not system_prompt:
|
|
86
92
|
raise InvalidValueError(message="No system prompt path provided.")
|
|
87
93
|
|
|
88
|
-
system_prompt = read_query_prompt(system_prompt)
|
|
94
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
89
95
|
|
|
90
96
|
formatted_prompt = (
|
|
91
97
|
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import litellm
|
|
2
|
-
import logging
|
|
3
2
|
from pydantic import BaseModel
|
|
4
3
|
from typing import Type, Optional
|
|
5
4
|
from litellm import acompletion, JSONSchemaValidationError
|
|
@@ -7,9 +6,11 @@ from litellm import acompletion, JSONSchemaValidationError
|
|
|
7
6
|
from cognee.shared.logging_utils import get_logger
|
|
8
7
|
from cognee.modules.observability.get_observe import get_observe
|
|
9
8
|
from cognee.exceptions import InvalidValueError
|
|
10
|
-
from cognee.infrastructure.llm.llm_interface import
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
10
|
+
LLMInterface,
|
|
11
|
+
)
|
|
12
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
13
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
13
14
|
rate_limit_async,
|
|
14
15
|
sleep_and_retry_async,
|
|
15
16
|
)
|
|
@@ -135,7 +136,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
135
136
|
text_input = "No user input provided."
|
|
136
137
|
if not system_prompt:
|
|
137
138
|
raise InvalidValueError(message="No system prompt path provided.")
|
|
138
|
-
system_prompt = read_query_prompt(system_prompt)
|
|
139
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
139
140
|
|
|
140
141
|
formatted_prompt = (
|
|
141
142
|
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
|
|
File without changes
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Adapter for Generic API LLM provider API"""
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
import litellm
|
|
5
4
|
import instructor
|
|
6
5
|
from typing import Type
|
|
@@ -10,8 +9,13 @@ from litellm.exceptions import ContentPolicyViolationError
|
|
|
10
9
|
from instructor.exceptions import InstructorRetryException
|
|
11
10
|
|
|
12
11
|
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|
13
|
-
from cognee.infrastructure.llm.llm_interface import
|
|
14
|
-
|
|
12
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
13
|
+
LLMInterface,
|
|
14
|
+
)
|
|
15
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
16
|
+
rate_limit_async,
|
|
17
|
+
sleep_and_retry_async,
|
|
18
|
+
)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class GenericAPIAdapter(LLMInterface):
|
|
@@ -4,7 +4,9 @@ from enum import Enum
|
|
|
4
4
|
|
|
5
5
|
from cognee.exceptions import InvalidValueError
|
|
6
6
|
from cognee.infrastructure.llm import get_llm_config
|
|
7
|
-
from cognee.infrastructure.llm.ollama.adapter import
|
|
7
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
|
|
8
|
+
OllamaAPIAdapter,
|
|
9
|
+
)
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
# Define an Enum for LLM Providers
|
|
@@ -59,7 +61,9 @@ def get_llm_client():
|
|
|
59
61
|
if llm_config.llm_api_key is None:
|
|
60
62
|
raise InvalidValueError(message="LLM API key is not set.")
|
|
61
63
|
|
|
62
|
-
from .openai.adapter import
|
|
64
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
|
|
65
|
+
OpenAIAdapter,
|
|
66
|
+
)
|
|
63
67
|
|
|
64
68
|
return OpenAIAdapter(
|
|
65
69
|
api_key=llm_config.llm_api_key,
|
|
@@ -78,7 +82,9 @@ def get_llm_client():
|
|
|
78
82
|
if llm_config.llm_api_key is None:
|
|
79
83
|
raise InvalidValueError(message="LLM API key is not set.")
|
|
80
84
|
|
|
81
|
-
from .generic_llm_api.adapter import
|
|
85
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
86
|
+
GenericAPIAdapter,
|
|
87
|
+
)
|
|
82
88
|
|
|
83
89
|
return OllamaAPIAdapter(
|
|
84
90
|
llm_config.llm_endpoint,
|
|
@@ -89,7 +95,9 @@ def get_llm_client():
|
|
|
89
95
|
)
|
|
90
96
|
|
|
91
97
|
elif provider == LLMProvider.ANTHROPIC:
|
|
92
|
-
from .anthropic.adapter import
|
|
98
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.anthropic.adapter import (
|
|
99
|
+
AnthropicAdapter,
|
|
100
|
+
)
|
|
93
101
|
|
|
94
102
|
return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
|
|
95
103
|
|
|
@@ -97,7 +105,9 @@ def get_llm_client():
|
|
|
97
105
|
if llm_config.llm_api_key is None:
|
|
98
106
|
raise InvalidValueError(message="LLM API key is not set.")
|
|
99
107
|
|
|
100
|
-
from .generic_llm_api.adapter import
|
|
108
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
109
|
+
GenericAPIAdapter,
|
|
110
|
+
)
|
|
101
111
|
|
|
102
112
|
return GenericAPIAdapter(
|
|
103
113
|
llm_config.llm_endpoint,
|
|
@@ -114,7 +124,9 @@ def get_llm_client():
|
|
|
114
124
|
if llm_config.llm_api_key is None:
|
|
115
125
|
raise InvalidValueError(message="LLM API key is not set.")
|
|
116
126
|
|
|
117
|
-
from .gemini.adapter import
|
|
127
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
|
|
128
|
+
GeminiAdapter,
|
|
129
|
+
)
|
|
118
130
|
|
|
119
131
|
return GeminiAdapter(
|
|
120
132
|
api_key=llm_config.llm_api_key,
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from typing import Type, Protocol
|
|
4
4
|
from abc import abstractmethod
|
|
5
5
|
from pydantic import BaseModel
|
|
6
|
-
from cognee.infrastructure.llm.
|
|
6
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class LLMInterface(Protocol):
|
|
@@ -57,7 +57,7 @@ class LLMInterface(Protocol):
|
|
|
57
57
|
text_input = "No user input provided."
|
|
58
58
|
if not system_prompt:
|
|
59
59
|
raise ValueError("No system prompt path provided.")
|
|
60
|
-
system_prompt = read_query_prompt(system_prompt)
|
|
60
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
61
61
|
|
|
62
62
|
formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
|
|
63
63
|
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py
ADDED
|
File without changes
|
|
@@ -4,8 +4,10 @@ from typing import Type
|
|
|
4
4
|
from openai import OpenAI
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
-
from cognee.infrastructure.llm.llm_interface import
|
|
8
|
-
|
|
7
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
8
|
+
LLMInterface,
|
|
9
|
+
)
|
|
10
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
9
11
|
rate_limit_async,
|
|
10
12
|
sleep_and_retry_async,
|
|
11
13
|
)
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py
ADDED
|
File without changes
|
|
@@ -8,11 +8,13 @@ from litellm.exceptions import ContentPolicyViolationError
|
|
|
8
8
|
from instructor.exceptions import InstructorRetryException
|
|
9
9
|
|
|
10
10
|
from cognee.exceptions import InvalidValueError
|
|
11
|
-
from cognee.infrastructure.llm.
|
|
12
|
-
from cognee.infrastructure.llm.llm_interface import
|
|
11
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
12
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
13
|
+
LLMInterface,
|
|
14
|
+
)
|
|
13
15
|
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|
14
16
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
15
|
-
from cognee.infrastructure.llm.rate_limiter import (
|
|
17
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
16
18
|
rate_limit_async,
|
|
17
19
|
rate_limit_sync,
|
|
18
20
|
sleep_and_retry_async,
|
|
@@ -324,7 +326,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
324
326
|
text_input = "No user input provided."
|
|
325
327
|
if not system_prompt:
|
|
326
328
|
raise InvalidValueError(message="No system prompt path provided.")
|
|
327
|
-
system_prompt = read_query_prompt(system_prompt)
|
|
329
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
328
330
|
|
|
329
331
|
formatted_prompt = (
|
|
330
332
|
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
|
|
@@ -50,11 +50,6 @@ from limits import RateLimitItemPerMinute, storage
|
|
|
50
50
|
from limits.strategies import MovingWindowRateLimiter
|
|
51
51
|
from cognee.shared.logging_utils import get_logger
|
|
52
52
|
from cognee.infrastructure.llm.config import get_llm_config
|
|
53
|
-
import threading
|
|
54
|
-
import logging
|
|
55
|
-
import functools
|
|
56
|
-
import openai
|
|
57
|
-
import os
|
|
58
53
|
|
|
59
54
|
logger = get_logger()
|
|
60
55
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Any
|
|
1
|
+
from typing import List, Any
|
|
2
2
|
|
|
3
3
|
from ..tokenizer_interface import TokenizerInterface
|
|
4
4
|
|
|
@@ -24,7 +24,9 @@ class GeminiTokenizer(TokenizerInterface):
|
|
|
24
24
|
|
|
25
25
|
# Get LLM API key from config
|
|
26
26
|
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
|
27
|
-
from cognee.infrastructure.llm.config import
|
|
27
|
+
from cognee.infrastructure.llm.config import (
|
|
28
|
+
get_llm_config,
|
|
29
|
+
)
|
|
28
30
|
|
|
29
31
|
config = get_embedding_config()
|
|
30
32
|
llm_config = get_llm_config()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Any
|
|
1
|
+
from typing import List, Any, Optional
|
|
2
2
|
import tiktoken
|
|
3
3
|
|
|
4
4
|
from ..tokenizer_interface import TokenizerInterface
|
|
@@ -12,13 +12,17 @@ class TikTokenTokenizer(TokenizerInterface):
|
|
|
12
12
|
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
|
-
model: str,
|
|
15
|
+
model: Optional[str] = None,
|
|
16
16
|
max_tokens: int = 8191,
|
|
17
17
|
):
|
|
18
18
|
self.model = model
|
|
19
19
|
self.max_tokens = max_tokens
|
|
20
20
|
# Initialize TikToken for GPT based on model
|
|
21
|
-
|
|
21
|
+
if model:
|
|
22
|
+
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
|
23
|
+
else:
|
|
24
|
+
# Use default if model not provided
|
|
25
|
+
self.tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
22
26
|
|
|
23
27
|
def extract_tokens(self, text: str) -> List[Any]:
|
|
24
28
|
"""
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import litellm
|
|
2
2
|
|
|
3
|
-
from cognee.infrastructure.llm.get_llm_client import
|
|
3
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
|
|
4
|
+
get_llm_client,
|
|
5
|
+
)
|
|
4
6
|
from cognee.shared.logging_utils import get_logger
|
|
5
7
|
|
|
6
8
|
logger = get_logger()
|