cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -4,43 +4,50 @@ from cognee.infrastructure.engine import DataPoint, Edge
|
|
|
4
4
|
from cognee.modules.storage.utils import copy_model
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def
|
|
8
|
-
"""Extract
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
7
|
+
def _extract_field_data(field_value: Any) -> List[Tuple[Optional[Edge], List[DataPoint]]]:
|
|
8
|
+
"""Extract edge metadata and datapoints from a field value."""
|
|
9
|
+
# Handle single DataPoint
|
|
10
|
+
if isinstance(field_value, DataPoint):
|
|
11
|
+
return [(None, [field_value])]
|
|
12
|
+
|
|
13
|
+
# Handle list - could contain DataPoints, edge tuples, or mixed
|
|
14
|
+
if isinstance(field_value, list) and len(field_value) > 0:
|
|
15
|
+
result = []
|
|
16
|
+
for item in field_value:
|
|
17
|
+
# Handle tuple[Edge, DataPoint or list[DataPoint]]
|
|
18
|
+
if isinstance(item, tuple) and len(item) == 2 and isinstance(item[0], Edge):
|
|
19
|
+
edge, data_value = item
|
|
20
|
+
if isinstance(data_value, DataPoint):
|
|
21
|
+
result.append((edge, [data_value]))
|
|
22
|
+
elif (
|
|
23
|
+
isinstance(data_value, list)
|
|
24
|
+
and len(data_value) > 0
|
|
25
|
+
and isinstance(data_value[0], DataPoint)
|
|
26
|
+
):
|
|
27
|
+
result.append((edge, data_value))
|
|
28
|
+
# Handle single DataPoint in list
|
|
29
|
+
elif isinstance(item, DataPoint):
|
|
30
|
+
result.append((None, [item]))
|
|
31
|
+
return result
|
|
32
|
+
|
|
33
|
+
# Handle tuple[Edge, DataPoint or list[DataPoint]]
|
|
20
34
|
if (
|
|
21
35
|
isinstance(field_value, tuple)
|
|
22
36
|
and len(field_value) == 2
|
|
23
37
|
and isinstance(field_value[0], Edge)
|
|
24
|
-
and isinstance(field_value[1], list)
|
|
25
|
-
and len(field_value[1]) > 0
|
|
26
|
-
and isinstance(field_value[1][0], DataPoint)
|
|
27
38
|
):
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
and len(field_value) > 0
|
|
38
|
-
and isinstance(field_value[0], DataPoint)
|
|
39
|
-
):
|
|
40
|
-
return "list_datapoint", field_value, None
|
|
39
|
+
edge_metadata, data_value = field_value
|
|
40
|
+
if isinstance(data_value, DataPoint):
|
|
41
|
+
return [(edge_metadata, [data_value])]
|
|
42
|
+
elif (
|
|
43
|
+
isinstance(data_value, list)
|
|
44
|
+
and len(data_value) > 0
|
|
45
|
+
and isinstance(data_value[0], DataPoint)
|
|
46
|
+
):
|
|
47
|
+
return [(edge_metadata, data_value)]
|
|
41
48
|
|
|
42
|
-
# Regular property
|
|
43
|
-
return
|
|
49
|
+
# Regular property or empty list
|
|
50
|
+
return []
|
|
44
51
|
|
|
45
52
|
|
|
46
53
|
def _create_edge_properties(
|
|
@@ -80,30 +87,49 @@ def _get_relationship_key(field_name: str, edge_metadata: Optional[Edge]) -> str
|
|
|
80
87
|
|
|
81
88
|
def _generate_property_key(data_point_id: str, relationship_key: str, target_id: str) -> str:
|
|
82
89
|
"""Generate a unique property key for visited_properties tracking."""
|
|
83
|
-
return f"{data_point_id}{relationship_key}{target_id}"
|
|
90
|
+
return f"{data_point_id}_{relationship_key}_{target_id}"
|
|
84
91
|
|
|
85
92
|
|
|
86
93
|
def _process_datapoint_field(
|
|
87
94
|
data_point_id: str,
|
|
88
95
|
field_name: str,
|
|
89
|
-
|
|
90
|
-
edge_metadata: Optional[Edge],
|
|
96
|
+
edge_datapoint_pairs: List[Tuple[Optional[Edge], List[DataPoint]]],
|
|
91
97
|
visited_properties: Dict[str, bool],
|
|
92
98
|
properties_to_visit: set,
|
|
93
99
|
excluded_properties: set,
|
|
94
100
|
) -> None:
|
|
95
|
-
"""Process a field containing
|
|
101
|
+
"""Process a field containing DataPoints, always working with lists."""
|
|
96
102
|
excluded_properties.add(field_name)
|
|
97
|
-
relationship_key = _get_relationship_key(field_name, edge_metadata)
|
|
98
103
|
|
|
99
|
-
for
|
|
100
|
-
|
|
101
|
-
|
|
104
|
+
for edge_metadata, datapoints in edge_datapoint_pairs:
|
|
105
|
+
relationship_key = _get_relationship_key(field_name, edge_metadata)
|
|
106
|
+
|
|
107
|
+
for datapoint in datapoints:
|
|
108
|
+
property_key = _generate_property_key(
|
|
109
|
+
data_point_id, relationship_key, str(datapoint.id)
|
|
110
|
+
)
|
|
111
|
+
if property_key in visited_properties:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# Always use field_name since we're working with lists
|
|
115
|
+
properties_to_visit.add(field_name)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _targets_generator(
|
|
119
|
+
data_point: DataPoint,
|
|
120
|
+
properties_to_visit: set,
|
|
121
|
+
) -> Tuple[DataPoint, str, Optional[Edge]]:
|
|
122
|
+
"""Generator that yields (target_datapoint, field_name, edge_metadata) tuples."""
|
|
123
|
+
for field_name in properties_to_visit:
|
|
124
|
+
field_value = getattr(data_point, field_name)
|
|
125
|
+
edge_datapoint_pairs = _extract_field_data(field_value)
|
|
126
|
+
|
|
127
|
+
if not edge_datapoint_pairs:
|
|
102
128
|
continue
|
|
103
129
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
130
|
+
for edge_metadata, datapoints in edge_datapoint_pairs:
|
|
131
|
+
for target_datapoint in datapoints:
|
|
132
|
+
yield target_datapoint, field_name, edge_metadata
|
|
107
133
|
|
|
108
134
|
|
|
109
135
|
async def get_graph_from_model(
|
|
@@ -143,26 +169,17 @@ async def get_graph_from_model(
|
|
|
143
169
|
if field_name == "metadata":
|
|
144
170
|
continue
|
|
145
171
|
|
|
146
|
-
|
|
172
|
+
edge_datapoint_pairs = _extract_field_data(field_value)
|
|
147
173
|
|
|
148
|
-
if
|
|
174
|
+
if not edge_datapoint_pairs:
|
|
175
|
+
# Regular property
|
|
149
176
|
data_point_properties[field_name] = field_value
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
data_point_id,
|
|
153
|
-
field_name,
|
|
154
|
-
[actual_value],
|
|
155
|
-
edge_metadata,
|
|
156
|
-
visited_properties,
|
|
157
|
-
properties_to_visit,
|
|
158
|
-
excluded_properties,
|
|
159
|
-
)
|
|
160
|
-
elif field_type in ["list_datapoint", "list_datapoint_with_edge"]:
|
|
177
|
+
else:
|
|
178
|
+
# DataPoint relationship
|
|
161
179
|
_process_datapoint_field(
|
|
162
180
|
data_point_id,
|
|
163
181
|
field_name,
|
|
164
|
-
|
|
165
|
-
edge_metadata,
|
|
182
|
+
edge_datapoint_pairs,
|
|
166
183
|
visited_properties,
|
|
167
184
|
properties_to_visit,
|
|
168
185
|
excluded_properties,
|
|
@@ -176,41 +193,15 @@ async def get_graph_from_model(
|
|
|
176
193
|
nodes.append(SimpleDataPointModel(**data_point_properties))
|
|
177
194
|
added_nodes[data_point_id] = True
|
|
178
195
|
|
|
179
|
-
# Process all relationships
|
|
180
|
-
for
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
index = int(index_str)
|
|
185
|
-
else:
|
|
186
|
-
field_name, index = field_name_with_index, None
|
|
187
|
-
|
|
188
|
-
# Get field value and extract edge metadata
|
|
189
|
-
field_value = getattr(data_point, field_name)
|
|
190
|
-
edge_metadata = None
|
|
191
|
-
|
|
192
|
-
if (
|
|
193
|
-
isinstance(field_value, tuple)
|
|
194
|
-
and len(field_value) == 2
|
|
195
|
-
and isinstance(field_value[0], Edge)
|
|
196
|
-
):
|
|
197
|
-
edge_metadata, field_value = field_value
|
|
198
|
-
|
|
199
|
-
# Get specific datapoint - handle both single and list cases
|
|
200
|
-
if index is not None:
|
|
201
|
-
# List case: extract specific item by index
|
|
202
|
-
target_datapoint = field_value[index]
|
|
203
|
-
elif isinstance(field_value, list):
|
|
204
|
-
# Single datapoint case that was wrapped in a list
|
|
205
|
-
target_datapoint = field_value[0]
|
|
206
|
-
else:
|
|
207
|
-
# True single datapoint case
|
|
208
|
-
target_datapoint = field_value
|
|
196
|
+
# Process all relationships using generator
|
|
197
|
+
for target_datapoint, field_name, edge_metadata in _targets_generator(
|
|
198
|
+
data_point, properties_to_visit
|
|
199
|
+
):
|
|
200
|
+
relationship_name = _get_relationship_key(field_name, edge_metadata)
|
|
209
201
|
|
|
210
202
|
# Create edge if not already added
|
|
211
|
-
edge_key = f"{data_point_id}{target_datapoint.id}{field_name}"
|
|
203
|
+
edge_key = f"{data_point_id}_{target_datapoint.id}_{field_name}"
|
|
212
204
|
if edge_key not in added_edges:
|
|
213
|
-
relationship_name = _get_relationship_key(field_name, edge_metadata)
|
|
214
205
|
edge_properties = _create_edge_properties(
|
|
215
206
|
data_point.id, target_datapoint.id, relationship_name, edge_metadata
|
|
216
207
|
)
|
|
@@ -218,23 +209,24 @@ async def get_graph_from_model(
|
|
|
218
209
|
added_edges[edge_key] = True
|
|
219
210
|
|
|
220
211
|
# Mark property as visited - CRITICAL for preventing infinite loops
|
|
221
|
-
relationship_key = _get_relationship_key(field_name, edge_metadata)
|
|
222
212
|
property_key = _generate_property_key(
|
|
223
|
-
data_point_id,
|
|
213
|
+
data_point_id, relationship_name, str(target_datapoint.id)
|
|
224
214
|
)
|
|
225
215
|
visited_properties[property_key] = True
|
|
226
216
|
|
|
227
217
|
# Recursively process target node if not already processed
|
|
228
|
-
if str(target_datapoint.id)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
218
|
+
if str(target_datapoint.id) in added_nodes:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
child_nodes, child_edges = await get_graph_from_model(
|
|
222
|
+
target_datapoint,
|
|
223
|
+
include_root=True,
|
|
224
|
+
added_nodes=added_nodes,
|
|
225
|
+
added_edges=added_edges,
|
|
226
|
+
visited_properties=visited_properties,
|
|
227
|
+
)
|
|
228
|
+
nodes.extend(child_nodes)
|
|
229
|
+
edges.extend(child_edges)
|
|
238
230
|
|
|
239
231
|
return nodes, edges
|
|
240
232
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import BinaryIO
|
|
2
2
|
from contextlib import asynccontextmanager
|
|
3
|
+
import hashlib
|
|
3
4
|
from cognee.infrastructure.data.utils.extract_keywords import extract_keywords
|
|
4
5
|
from .IngestionData import IngestionData
|
|
5
6
|
|
|
@@ -16,9 +17,9 @@ class TextData(IngestionData):
|
|
|
16
17
|
self.data = data
|
|
17
18
|
|
|
18
19
|
def get_identifier(self):
|
|
19
|
-
|
|
20
|
+
metadata = self.get_metadata()
|
|
20
21
|
|
|
21
|
-
return "
|
|
22
|
+
return metadata["content_hash"]
|
|
22
23
|
|
|
23
24
|
def get_metadata(self):
|
|
24
25
|
self.ensure_metadata()
|
|
@@ -29,6 +30,11 @@ class TextData(IngestionData):
|
|
|
29
30
|
if self.metadata is None:
|
|
30
31
|
self.metadata = {}
|
|
31
32
|
|
|
33
|
+
data_contents = self.data.encode("utf-8")
|
|
34
|
+
hash_contents = hashlib.md5(data_contents).hexdigest()
|
|
35
|
+
self.metadata["name"] = "text_" + hash_contents + ".txt"
|
|
36
|
+
self.metadata["content_hash"] = hash_contents
|
|
37
|
+
|
|
32
38
|
@asynccontextmanager
|
|
33
39
|
async def get_data(self):
|
|
34
40
|
yield self.data
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import hashlib
|
|
2
1
|
from typing import BinaryIO, Union
|
|
3
2
|
from cognee.infrastructure.files.storage import get_file_storage, get_storage_config
|
|
4
3
|
from .classify import classify
|
|
4
|
+
import hashlib
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
async def save_data_to_file(data: Union[str, BinaryIO], filename: str = None):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .exceptions import PipelineRunFailedError
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from cognee.exceptions import CogneeApiError
|
|
2
|
+
from fastapi import status
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PipelineRunFailedError(CogneeApiError):
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
message: str = "Pipeline run failed.",
|
|
9
|
+
name: str = "PipelineRunFailedError",
|
|
10
|
+
status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
11
|
+
):
|
|
12
|
+
super().__init__(message, name, status_code)
|
|
@@ -9,6 +9,7 @@ class PipelineRunInfo(BaseModel):
|
|
|
9
9
|
dataset_id: UUID
|
|
10
10
|
dataset_name: str
|
|
11
11
|
payload: Optional[Any] = None
|
|
12
|
+
data_ingestion_info: Optional[list] = None
|
|
12
13
|
|
|
13
14
|
model_config = {
|
|
14
15
|
"arbitrary_types_allowed": True,
|
|
@@ -30,6 +31,11 @@ class PipelineRunCompleted(PipelineRunInfo):
|
|
|
30
31
|
pass
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
class PipelineRunAlreadyCompleted(PipelineRunInfo):
|
|
35
|
+
status: str = "PipelineRunAlreadyCompleted"
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
33
39
|
class PipelineRunErrored(PipelineRunInfo):
|
|
34
40
|
status: str = "PipelineRunErrored"
|
|
35
41
|
pass
|
|
@@ -52,6 +52,7 @@ async def cognee_pipeline(
|
|
|
52
52
|
pipeline_name: str = "custom_pipeline",
|
|
53
53
|
vector_db_config: dict = None,
|
|
54
54
|
graph_db_config: dict = None,
|
|
55
|
+
incremental_loading: bool = False,
|
|
55
56
|
):
|
|
56
57
|
# Note: These context variables allow different value assignment for databases in Cognee
|
|
57
58
|
# per async task, thread, process and etc.
|
|
@@ -69,7 +70,10 @@ async def cognee_pipeline(
|
|
|
69
70
|
cognee_pipeline.first_run = True
|
|
70
71
|
|
|
71
72
|
if cognee_pipeline.first_run:
|
|
72
|
-
from cognee.infrastructure.llm.utils import
|
|
73
|
+
from cognee.infrastructure.llm.utils import (
|
|
74
|
+
test_llm_connection,
|
|
75
|
+
test_embedding_connection,
|
|
76
|
+
)
|
|
73
77
|
|
|
74
78
|
# Test LLM and Embedding configuration once before running Cognee
|
|
75
79
|
await test_llm_connection()
|
|
@@ -106,6 +110,7 @@ async def cognee_pipeline(
|
|
|
106
110
|
data=data,
|
|
107
111
|
pipeline_name=pipeline_name,
|
|
108
112
|
context={"dataset": dataset},
|
|
113
|
+
incremental_loading=incremental_loading,
|
|
109
114
|
):
|
|
110
115
|
yield run_info
|
|
111
116
|
|
|
@@ -117,6 +122,7 @@ async def run_pipeline(
|
|
|
117
122
|
data=None,
|
|
118
123
|
pipeline_name: str = "custom_pipeline",
|
|
119
124
|
context: dict = None,
|
|
125
|
+
incremental_loading=False,
|
|
120
126
|
):
|
|
121
127
|
check_dataset_name(dataset.name)
|
|
122
128
|
|
|
@@ -184,7 +190,9 @@ async def run_pipeline(
|
|
|
184
190
|
if not isinstance(task, Task):
|
|
185
191
|
raise ValueError(f"Task {task} is not an instance of Task")
|
|
186
192
|
|
|
187
|
-
pipeline_run = run_tasks(
|
|
193
|
+
pipeline_run = run_tasks(
|
|
194
|
+
tasks, dataset_id, data, user, pipeline_name, context, incremental_loading
|
|
195
|
+
)
|
|
188
196
|
|
|
189
197
|
async for pipeline_run_info in pipeline_run:
|
|
190
198
|
yield pipeline_run_info
|