cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. cognee/api/client.py +44 -4
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +13 -3
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
  116. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  117. cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
  118. cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
  119. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  120. cognee/modules/ingestion/data_types/TextData.py +8 -2
  121. cognee/modules/ingestion/save_data_to_file.py +1 -1
  122. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  123. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  124. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  125. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  126. cognee/modules/pipelines/models/__init__.py +1 -0
  127. cognee/modules/pipelines/operations/pipeline.py +10 -2
  128. cognee/modules/pipelines/operations/run_tasks.py +252 -20
  129. cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
  130. cognee/modules/retrieval/chunks_retriever.py +23 -1
  131. cognee/modules/retrieval/code_retriever.py +66 -9
  132. cognee/modules/retrieval/completion_retriever.py +11 -9
  133. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  134. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  135. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  136. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  137. cognee/modules/retrieval/insights_retriever.py +4 -0
  138. cognee/modules/retrieval/natural_language_retriever.py +9 -15
  139. cognee/modules/retrieval/summaries_retriever.py +23 -1
  140. cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
  141. cognee/modules/retrieval/utils/completion.py +6 -9
  142. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  143. cognee/modules/search/methods/search.py +5 -1
  144. cognee/modules/search/operations/__init__.py +1 -0
  145. cognee/modules/search/operations/select_search_type.py +42 -0
  146. cognee/modules/search/types/SearchType.py +1 -0
  147. cognee/modules/settings/get_settings.py +0 -8
  148. cognee/modules/settings/save_vector_db_config.py +1 -1
  149. cognee/shared/data_models.py +3 -1
  150. cognee/shared/logging_utils.py +0 -5
  151. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  152. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  153. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  154. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  155. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  156. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  157. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  158. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  159. cognee/tasks/graph/infer_data_ontology.py +5 -6
  160. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  161. cognee/tasks/ingestion/ingest_data.py +91 -61
  162. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  163. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  164. cognee/tasks/storage/index_data_points.py +1 -1
  165. cognee/tasks/storage/index_graph_edges.py +4 -1
  166. cognee/tasks/summarization/summarize_code.py +2 -3
  167. cognee/tasks/summarization/summarize_text.py +3 -2
  168. cognee/tests/test_cognee_server_start.py +12 -7
  169. cognee/tests/test_deduplication.py +2 -2
  170. cognee/tests/test_deletion.py +58 -17
  171. cognee/tests/test_graph_visualization_permissions.py +161 -0
  172. cognee/tests/test_neptune_analytics_graph.py +309 -0
  173. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  174. cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
  175. cognee/tests/test_pgvector.py +5 -5
  176. cognee/tests/test_s3.py +1 -6
  177. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  178. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  179. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  180. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  181. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  182. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  183. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  184. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
  185. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  186. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
  187. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
  188. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  189. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  190. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  191. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  192. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  193. cognee/modules/data/extraction/extract_categories.py +0 -14
  194. cognee/tests/test_qdrant.py +0 -99
  195. distributed/Dockerfile +0 -34
  196. distributed/app.py +0 -4
  197. distributed/entrypoint.py +0 -71
  198. distributed/entrypoint.sh +0 -5
  199. distributed/modal_image.py +0 -11
  200. distributed/queues.py +0 -5
  201. distributed/tasks/queued_add_data_points.py +0 -13
  202. distributed/tasks/queued_add_edges.py +0 -13
  203. distributed/tasks/queued_add_nodes.py +0 -13
  204. distributed/test.py +0 -28
  205. distributed/utils.py +0 -19
  206. distributed/workers/data_point_saving_worker.py +0 -93
  207. distributed/workers/graph_saving_worker.py +0 -104
  208. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  209. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  210. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  211. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  212. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  213. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  214. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  215. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  216. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  217. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  218. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  219. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  220. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  221. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
  222. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
  223. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,89 @@
1
+ import os
2
+ from typing import Type
3
+ from pydantic import BaseModel
4
+ from baml_py import ClientRegistry
5
+ from cognee.shared.logging_utils import get_logger
6
+ from cognee.shared.data_models import SummarizedCode
7
+ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
8
+ from cognee.infrastructure.llm.config import get_llm_config
9
+
10
+
11
+ logger = get_logger("extract_summary_baml")
12
+
13
+
14
+ def get_mock_summarized_code():
15
+ """Local mock function to avoid circular imports."""
16
+ return SummarizedCode(
17
+ high_level_summary="Mock code summary",
18
+ key_features=["Mock feature 1", "Mock feature 2"],
19
+ imports=["mock_import"],
20
+ constants=["MOCK_CONSTANT"],
21
+ classes=[],
22
+ functions=[],
23
+ workflow_description="Mock workflow description",
24
+ )
25
+
26
+
27
+ async def extract_summary(content: str, response_model: Type[BaseModel]):
28
+ """
29
+ Extract summary using BAML framework.
30
+
31
+ Args:
32
+ content: The content to summarize
33
+ response_model: The Pydantic model type for the response
34
+
35
+ Returns:
36
+ BaseModel: The summarized content in the specified format
37
+ """
38
+ config = get_llm_config()
39
+
40
+ # Use BAML's SummarizeContent function
41
+ summary_result = await b.SummarizeContent(
42
+ content, baml_options={"client_registry": config.baml_registry}
43
+ )
44
+
45
+ # Convert BAML result to the expected response model
46
+ if response_model is SummarizedCode:
47
+ # If it's asking for SummarizedCode but we got SummarizedContent,
48
+ # we need to use SummarizeCode instead
49
+ code_result = await b.SummarizeCode(
50
+ content, baml_options={"client_registry": config.baml_registry}
51
+ )
52
+ return code_result
53
+ else:
54
+ # For other models, return the summary result
55
+ return summary_result
56
+
57
+
58
+ async def extract_code_summary(content: str):
59
+ """
60
+ Extract code summary using BAML framework with mocking support.
61
+
62
+ Args:
63
+ content: The code content to summarize
64
+
65
+ Returns:
66
+ SummarizedCode: The summarized code information
67
+ """
68
+ enable_mocking = os.getenv("MOCK_CODE_SUMMARY", "false")
69
+ if isinstance(enable_mocking, bool):
70
+ enable_mocking = str(enable_mocking).lower()
71
+ enable_mocking = enable_mocking in ("true", "1", "yes")
72
+
73
+ if enable_mocking:
74
+ result = get_mock_summarized_code()
75
+ return result
76
+ else:
77
+ try:
78
+ config = get_llm_config()
79
+
80
+ result = await b.SummarizeCode(
81
+ content, baml_options={"client_registry": config.baml_registry}
82
+ )
83
+ except Exception as e:
84
+ logger.error(
85
+ "Failed to extract code summary with BAML, falling back to mock summary", exc_info=e
86
+ )
87
+ result = get_mock_summarized_code()
88
+
89
+ return result
@@ -0,0 +1,33 @@
1
+ from typing import Type
2
+ from pydantic import BaseModel
3
+ from cognee.infrastructure.llm.config import get_llm_config
4
+ from cognee.shared.logging_utils import get_logger, setup_logging
5
+ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
6
+
7
+
8
+ async def extract_content_graph(
9
+ content: str, response_model: Type[BaseModel], mode: str = "simple"
10
+ ):
11
+ config = get_llm_config()
12
+ setup_logging()
13
+
14
+ get_logger(level="INFO")
15
+
16
+ # if response_model:
17
+ # # tb = TypeBuilder()
18
+ # # country = tb.union \
19
+ # # ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")])
20
+ # # tb.Node.add_property("country", country)
21
+ #
22
+ # graph = await b.ExtractDynamicContentGraph(
23
+ # content, mode=mode, baml_options={"client_registry": baml_registry}
24
+ # )
25
+ #
26
+ # return graph
27
+
28
+ # else:
29
+ graph = await b.ExtractContentGraphGeneric(
30
+ content, mode=mode, baml_options={"client_registry": config.baml_registry}
31
+ )
32
+
33
+ return graph
@@ -0,0 +1,18 @@
1
+ // This helps use auto generate libraries you can use in the language of
2
+ // your choice. You can have multiple generators if you use multiple languages.
3
+ // Just ensure that the output_dir is different for each generator.
4
+ generator target {
5
+ // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
6
+ output_type "python/pydantic"
7
+
8
+ // Where the generated code will be saved (relative to baml_src/)
9
+ output_dir "../baml/"
10
+
11
+ // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
12
+ // The BAML VSCode extension version should also match this version.
13
+ version "0.201.0"
14
+
15
+ // Valid values: "sync", "async"
16
+ // This controls what `b.FunctionName()` will be (sync or async).
17
+ default_client_mode sync
18
+ }
@@ -0,0 +1,3 @@
1
+ from .knowledge_graph.extract_content_graph import extract_content_graph
2
+ from .extract_categories import extract_categories
3
+ from .extract_summary import extract_summary, extract_code_summary
@@ -0,0 +1,12 @@
1
+ from typing import Type
2
+ from pydantic import BaseModel
3
+
4
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
5
+
6
+
7
+ async def extract_categories(content: str, response_model: Type[BaseModel]):
8
+ system_prompt = LLMGateway.read_query_prompt("classify_content.txt")
9
+
10
+ llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
11
+
12
+ return llm_output
@@ -5,20 +5,29 @@ from typing import Type
5
5
  from instructor.exceptions import InstructorRetryException
6
6
  from pydantic import BaseModel
7
7
 
8
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
9
- from cognee.infrastructure.llm.prompts import read_query_prompt
8
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
10
9
  from cognee.shared.data_models import SummarizedCode
11
- from cognee.tasks.summarization.mock_summary import get_mock_summarized_code
12
10
 
13
11
  logger = get_logger("extract_summary")
14
12
 
15
13
 
16
- async def extract_summary(content: str, response_model: Type[BaseModel]):
17
- llm_client = get_llm_client()
14
+ def get_mock_summarized_code():
15
+ """Local mock function to avoid circular imports."""
16
+ return SummarizedCode(
17
+ high_level_summary="Mock code summary",
18
+ key_features=["Mock feature 1", "Mock feature 2"],
19
+ imports=["mock_import"],
20
+ constants=["MOCK_CONSTANT"],
21
+ classes=[],
22
+ functions=[],
23
+ workflow_description="Mock workflow description",
24
+ )
25
+
18
26
 
19
- system_prompt = read_query_prompt("summarize_content.txt")
27
+ async def extract_summary(content: str, response_model: Type[BaseModel]):
28
+ system_prompt = LLMGateway.read_query_prompt("summarize_content.txt")
20
29
 
21
- llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
30
+ llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
22
31
 
23
32
  return llm_output
24
33
 
@@ -1,13 +1,14 @@
1
1
  import os
2
2
  from typing import Type
3
3
  from pydantic import BaseModel
4
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
5
- from cognee.infrastructure.llm.prompts import render_prompt
6
- from cognee.infrastructure.llm.config import get_llm_config
4
+
5
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
6
+ from cognee.infrastructure.llm.config import (
7
+ get_llm_config,
8
+ )
7
9
 
8
10
 
9
11
  async def extract_content_graph(content: str, response_model: Type[BaseModel]):
10
- llm_client = get_llm_client()
11
12
  llm_config = get_llm_config()
12
13
 
13
14
  prompt_path = llm_config.graph_prompt_path
@@ -21,9 +22,9 @@ async def extract_content_graph(content: str, response_model: Type[BaseModel]):
21
22
  else:
22
23
  base_directory = None
23
24
 
24
- system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
25
+ system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
25
26
 
26
- content_graph = await llm_client.acreate_structured_output(
27
+ content_graph = await LLMGateway.acreate_structured_output(
27
28
  content, system_prompt, response_model
28
29
  )
29
30
 
@@ -3,9 +3,15 @@ from pydantic import BaseModel
3
3
  import instructor
4
4
 
5
5
  from cognee.exceptions import InvalidValueError
6
- from cognee.infrastructure.llm.llm_interface import LLMInterface
7
- from cognee.infrastructure.llm.prompts import read_query_prompt
8
- from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
6
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
7
+ LLMInterface,
8
+ )
9
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
10
+ rate_limit_async,
11
+ sleep_and_retry_async,
12
+ )
13
+
14
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
9
15
 
10
16
 
11
17
  class AnthropicAdapter(LLMInterface):
@@ -85,7 +91,7 @@ class AnthropicAdapter(LLMInterface):
85
91
  if not system_prompt:
86
92
  raise InvalidValueError(message="No system prompt path provided.")
87
93
 
88
- system_prompt = read_query_prompt(system_prompt)
94
+ system_prompt = LLMGateway.read_query_prompt(system_prompt)
89
95
 
90
96
  formatted_prompt = (
91
97
  f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
@@ -1,5 +1,4 @@
1
1
  import litellm
2
- import logging
3
2
  from pydantic import BaseModel
4
3
  from typing import Type, Optional
5
4
  from litellm import acompletion, JSONSchemaValidationError
@@ -7,9 +6,11 @@ from litellm import acompletion, JSONSchemaValidationError
7
6
  from cognee.shared.logging_utils import get_logger
8
7
  from cognee.modules.observability.get_observe import get_observe
9
8
  from cognee.exceptions import InvalidValueError
10
- from cognee.infrastructure.llm.llm_interface import LLMInterface
11
- from cognee.infrastructure.llm.prompts import read_query_prompt
12
- from cognee.infrastructure.llm.rate_limiter import (
9
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
10
+ LLMInterface,
11
+ )
12
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
13
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
13
14
  rate_limit_async,
14
15
  sleep_and_retry_async,
15
16
  )
@@ -135,7 +136,7 @@ class GeminiAdapter(LLMInterface):
135
136
  text_input = "No user input provided."
136
137
  if not system_prompt:
137
138
  raise InvalidValueError(message="No system prompt path provided.")
138
- system_prompt = read_query_prompt(system_prompt)
139
+ system_prompt = LLMGateway.read_query_prompt(system_prompt)
139
140
 
140
141
  formatted_prompt = (
141
142
  f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
@@ -1,6 +1,5 @@
1
1
  """Adapter for Generic API LLM provider API"""
2
2
 
3
- import logging
4
3
  import litellm
5
4
  import instructor
6
5
  from typing import Type
@@ -10,8 +9,13 @@ from litellm.exceptions import ContentPolicyViolationError
10
9
  from instructor.exceptions import InstructorRetryException
11
10
 
12
11
  from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
13
- from cognee.infrastructure.llm.llm_interface import LLMInterface
14
- from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
12
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
13
+ LLMInterface,
14
+ )
15
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
16
+ rate_limit_async,
17
+ sleep_and_retry_async,
18
+ )
15
19
 
16
20
 
17
21
  class GenericAPIAdapter(LLMInterface):
@@ -4,7 +4,9 @@ from enum import Enum
4
4
 
5
5
  from cognee.exceptions import InvalidValueError
6
6
  from cognee.infrastructure.llm import get_llm_config
7
- from cognee.infrastructure.llm.ollama.adapter import OllamaAPIAdapter
7
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
8
+ OllamaAPIAdapter,
9
+ )
8
10
 
9
11
 
10
12
  # Define an Enum for LLM Providers
@@ -59,7 +61,9 @@ def get_llm_client():
59
61
  if llm_config.llm_api_key is None:
60
62
  raise InvalidValueError(message="LLM API key is not set.")
61
63
 
62
- from .openai.adapter import OpenAIAdapter
64
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
65
+ OpenAIAdapter,
66
+ )
63
67
 
64
68
  return OpenAIAdapter(
65
69
  api_key=llm_config.llm_api_key,
@@ -78,7 +82,9 @@ def get_llm_client():
78
82
  if llm_config.llm_api_key is None:
79
83
  raise InvalidValueError(message="LLM API key is not set.")
80
84
 
81
- from .generic_llm_api.adapter import GenericAPIAdapter
85
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
86
+ GenericAPIAdapter,
87
+ )
82
88
 
83
89
  return OllamaAPIAdapter(
84
90
  llm_config.llm_endpoint,
@@ -89,7 +95,9 @@ def get_llm_client():
89
95
  )
90
96
 
91
97
  elif provider == LLMProvider.ANTHROPIC:
92
- from .anthropic.adapter import AnthropicAdapter
98
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.anthropic.adapter import (
99
+ AnthropicAdapter,
100
+ )
93
101
 
94
102
  return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
95
103
 
@@ -97,7 +105,9 @@ def get_llm_client():
97
105
  if llm_config.llm_api_key is None:
98
106
  raise InvalidValueError(message="LLM API key is not set.")
99
107
 
100
- from .generic_llm_api.adapter import GenericAPIAdapter
108
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
109
+ GenericAPIAdapter,
110
+ )
101
111
 
102
112
  return GenericAPIAdapter(
103
113
  llm_config.llm_endpoint,
@@ -114,7 +124,9 @@ def get_llm_client():
114
124
  if llm_config.llm_api_key is None:
115
125
  raise InvalidValueError(message="LLM API key is not set.")
116
126
 
117
- from .gemini.adapter import GeminiAdapter
127
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
128
+ GeminiAdapter,
129
+ )
118
130
 
119
131
  return GeminiAdapter(
120
132
  api_key=llm_config.llm_api_key,
@@ -3,7 +3,7 @@
3
3
  from typing import Type, Protocol
4
4
  from abc import abstractmethod
5
5
  from pydantic import BaseModel
6
- from cognee.infrastructure.llm.prompts import read_query_prompt
6
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
7
7
 
8
8
 
9
9
  class LLMInterface(Protocol):
@@ -57,7 +57,7 @@ class LLMInterface(Protocol):
57
57
  text_input = "No user input provided."
58
58
  if not system_prompt:
59
59
  raise ValueError("No system prompt path provided.")
60
- system_prompt = read_query_prompt(system_prompt)
60
+ system_prompt = LLMGateway.read_query_prompt(system_prompt)
61
61
 
62
62
  formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
63
63
 
@@ -4,8 +4,10 @@ from typing import Type
4
4
  from openai import OpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
- from cognee.infrastructure.llm.llm_interface import LLMInterface
8
- from cognee.infrastructure.llm.rate_limiter import (
7
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
8
+ LLMInterface,
9
+ )
10
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
9
11
  rate_limit_async,
10
12
  sleep_and_retry_async,
11
13
  )
@@ -8,11 +8,13 @@ from litellm.exceptions import ContentPolicyViolationError
8
8
  from instructor.exceptions import InstructorRetryException
9
9
 
10
10
  from cognee.exceptions import InvalidValueError
11
- from cognee.infrastructure.llm.prompts import read_query_prompt
12
- from cognee.infrastructure.llm.llm_interface import LLMInterface
11
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
12
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
13
+ LLMInterface,
14
+ )
13
15
  from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
14
16
  from cognee.infrastructure.files.utils.open_data_file import open_data_file
15
- from cognee.infrastructure.llm.rate_limiter import (
17
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
16
18
  rate_limit_async,
17
19
  rate_limit_sync,
18
20
  sleep_and_retry_async,
@@ -324,7 +326,7 @@ class OpenAIAdapter(LLMInterface):
324
326
  text_input = "No user input provided."
325
327
  if not system_prompt:
326
328
  raise InvalidValueError(message="No system prompt path provided.")
327
- system_prompt = read_query_prompt(system_prompt)
329
+ system_prompt = LLMGateway.read_query_prompt(system_prompt)
328
330
 
329
331
  formatted_prompt = (
330
332
  f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
@@ -50,11 +50,6 @@ from limits import RateLimitItemPerMinute, storage
50
50
  from limits.strategies import MovingWindowRateLimiter
51
51
  from cognee.shared.logging_utils import get_logger
52
52
  from cognee.infrastructure.llm.config import get_llm_config
53
- import threading
54
- import logging
55
- import functools
56
- import openai
57
- import os
58
53
 
59
54
  logger = get_logger()
60
55
 
@@ -1,4 +1,4 @@
1
- from typing import List, Any, Union
1
+ from typing import List, Any
2
2
 
3
3
  from ..tokenizer_interface import TokenizerInterface
4
4
 
@@ -24,7 +24,9 @@ class GeminiTokenizer(TokenizerInterface):
24
24
 
25
25
  # Get LLM API key from config
26
26
  from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
27
- from cognee.infrastructure.llm.config import get_llm_config
27
+ from cognee.infrastructure.llm.config import (
28
+ get_llm_config,
29
+ )
28
30
 
29
31
  config = get_embedding_config()
30
32
  llm_config = get_llm_config()
@@ -1,4 +1,4 @@
1
- from typing import List, Any
1
+ from typing import List, Any, Optional
2
2
  import tiktoken
3
3
 
4
4
  from ..tokenizer_interface import TokenizerInterface
@@ -12,13 +12,17 @@ class TikTokenTokenizer(TokenizerInterface):
12
12
 
13
13
  def __init__(
14
14
  self,
15
- model: str,
15
+ model: Optional[str] = None,
16
16
  max_tokens: int = 8191,
17
17
  ):
18
18
  self.model = model
19
19
  self.max_tokens = max_tokens
20
20
  # Initialize TikToken for GPT based on model
21
- self.tokenizer = tiktoken.encoding_for_model(self.model)
21
+ if model:
22
+ self.tokenizer = tiktoken.encoding_for_model(self.model)
23
+ else:
24
+ # Use default if model not provided
25
+ self.tokenizer = tiktoken.get_encoding("cl100k_base")
22
26
 
23
27
  def extract_tokens(self, text: str) -> List[Any]:
24
28
  """
@@ -1 +1,5 @@
1
1
  from .tokenizer_interface import TokenizerInterface
2
+ from .Mistral import MistralTokenizer
3
+ from .Gemini import GeminiTokenizer
4
+ from .HuggingFace import HuggingFaceTokenizer
5
+ from .TikToken import TikTokenTokenizer
@@ -1,6 +1,8 @@
1
1
  import litellm
2
2
 
3
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
3
+ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
4
+ get_llm_client,
5
+ )
4
6
  from cognee.shared.logging_utils import get_logger
5
7
 
6
8
  logger = get_logger()