cognee 0.5.0.dev0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +1 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +12 -37
- cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
- cognee/api/v1/search/search.py +8 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/context_global_variables.py +61 -16
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/graph/config.py +3 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +2 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +35 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_retriever.py +10 -0
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +4 -0
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +46 -18
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +15 -3
- cognee/shared/logging_utils.py +4 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/test_cognee_server_start.py +2 -4
- cognee/tests/test_conversation_history.py +23 -1
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_search_db.py +37 -1
- cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/search/test_search.py +100 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/METADATA +76 -89
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/RECORD +119 -97
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/WHEEL +1 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- cognee/tests/test_delete_bmw_example.py +0 -60
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import types
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from cognee.modules.search.types import SearchType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _make_user(user_id: str = "u1", tenant_id=None):
|
|
10
|
+
return types.SimpleNamespace(id=user_id, tenant_id=tenant_id)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _make_dataset(*, name="ds", tenant_id="t1", dataset_id=None, owner_id=None):
|
|
14
|
+
return types.SimpleNamespace(
|
|
15
|
+
id=dataset_id or uuid4(),
|
|
16
|
+
name=name,
|
|
17
|
+
tenant_id=tenant_id,
|
|
18
|
+
owner_id=owner_id or uuid4(),
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def search_mod():
|
|
24
|
+
import importlib
|
|
25
|
+
|
|
26
|
+
return importlib.import_module("cognee.modules.search.methods.search")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture(autouse=True)
|
|
30
|
+
def _patch_side_effect_boundaries(monkeypatch, search_mod):
|
|
31
|
+
"""
|
|
32
|
+
Keep production logic; patch only unavoidable side-effect boundaries.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
async def dummy_log_query(_query_text, _query_type, _user_id):
|
|
36
|
+
return types.SimpleNamespace(id="qid-1")
|
|
37
|
+
|
|
38
|
+
async def dummy_log_result(*_args, **_kwargs):
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
async def dummy_prepare_search_result(search_result):
|
|
42
|
+
if isinstance(search_result, tuple) and len(search_result) == 3:
|
|
43
|
+
result, context, datasets = search_result
|
|
44
|
+
return {"result": result, "context": context, "graphs": {}, "datasets": datasets}
|
|
45
|
+
return {"result": None, "context": None, "graphs": {}, "datasets": []}
|
|
46
|
+
|
|
47
|
+
monkeypatch.setattr(search_mod, "send_telemetry", lambda *a, **k: None)
|
|
48
|
+
monkeypatch.setattr(search_mod, "log_query", dummy_log_query)
|
|
49
|
+
monkeypatch.setattr(search_mod, "log_result", dummy_log_result)
|
|
50
|
+
monkeypatch.setattr(search_mod, "prepare_search_result", dummy_prepare_search_result)
|
|
51
|
+
|
|
52
|
+
yield
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.mark.asyncio
|
|
56
|
+
async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, search_mod):
|
|
57
|
+
user = _make_user()
|
|
58
|
+
ds = _make_dataset(name="ds1", tenant_id="t1")
|
|
59
|
+
|
|
60
|
+
async def dummy_authorized_search(**kwargs):
|
|
61
|
+
assert kwargs["dataset_ids"] == [ds.id]
|
|
62
|
+
return [("r", ["ctx"], [ds])]
|
|
63
|
+
|
|
64
|
+
monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
|
|
65
|
+
monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)
|
|
66
|
+
|
|
67
|
+
out_non_verbose = await search_mod.search(
|
|
68
|
+
query_text="q",
|
|
69
|
+
query_type=SearchType.CHUNKS,
|
|
70
|
+
dataset_ids=[ds.id],
|
|
71
|
+
user=user,
|
|
72
|
+
verbose=False,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
assert out_non_verbose == [
|
|
76
|
+
{
|
|
77
|
+
"search_result": ["r"],
|
|
78
|
+
"dataset_id": ds.id,
|
|
79
|
+
"dataset_name": "ds1",
|
|
80
|
+
"dataset_tenant_id": "t1",
|
|
81
|
+
}
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
out_verbose = await search_mod.search(
|
|
85
|
+
query_text="q",
|
|
86
|
+
query_type=SearchType.CHUNKS,
|
|
87
|
+
dataset_ids=[ds.id],
|
|
88
|
+
user=user,
|
|
89
|
+
verbose=True,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
assert out_verbose == [
|
|
93
|
+
{
|
|
94
|
+
"search_result": ["r"],
|
|
95
|
+
"dataset_id": ds.id,
|
|
96
|
+
"dataset_name": "ds1",
|
|
97
|
+
"dataset_tenant_id": "t1",
|
|
98
|
+
"graphs": {},
|
|
99
|
+
}
|
|
100
|
+
]
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import AsyncMock, patch
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.engine import DataPoint
|
|
6
|
+
from cognee.modules.engine.models import Triplet
|
|
7
|
+
from cognee.tasks.storage.add_data_points import (
|
|
8
|
+
add_data_points,
|
|
9
|
+
InvalidDataPointsInAddDataPointsError,
|
|
10
|
+
_extract_embeddable_text_from_datapoint,
|
|
11
|
+
_create_triplets_from_graph,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
adp_module = sys.modules["cognee.tasks.storage.add_data_points"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SimplePoint(DataPoint):
|
|
18
|
+
text: str
|
|
19
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
@pytest.mark.parametrize("bad_input", [None, ["not_datapoint"]])
|
|
24
|
+
async def test_add_data_points_validates_inputs(bad_input):
|
|
25
|
+
with pytest.raises(InvalidDataPointsInAddDataPointsError):
|
|
26
|
+
await add_data_points(bad_input)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
31
|
+
@patch.object(adp_module, "index_data_points")
|
|
32
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
33
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
34
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
35
|
+
async def test_add_data_points_indexes_nodes_and_edges(
|
|
36
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
37
|
+
):
|
|
38
|
+
dp1 = SimplePoint(text="first")
|
|
39
|
+
dp2 = SimplePoint(text="second")
|
|
40
|
+
|
|
41
|
+
edge1 = (str(dp1.id), str(dp2.id), "related_to", {"edge_text": "connects"})
|
|
42
|
+
custom_edges = [(str(dp2.id), str(dp1.id), "custom_edge", {})]
|
|
43
|
+
|
|
44
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
45
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
46
|
+
graph_engine = AsyncMock()
|
|
47
|
+
mock_get_engine.return_value = graph_engine
|
|
48
|
+
|
|
49
|
+
result = await add_data_points([dp1, dp2], custom_edges=custom_edges)
|
|
50
|
+
|
|
51
|
+
assert result == [dp1, dp2]
|
|
52
|
+
graph_engine.add_nodes.assert_awaited_once()
|
|
53
|
+
mock_index_nodes.assert_awaited_once()
|
|
54
|
+
assert graph_engine.add_edges.await_count == 2
|
|
55
|
+
assert edge1 in graph_engine.add_edges.await_args_list[0].args[0]
|
|
56
|
+
assert graph_engine.add_edges.await_args_list[1].args[0] == custom_edges
|
|
57
|
+
assert mock_index_edges.await_count == 2
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.mark.asyncio
|
|
61
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
62
|
+
@patch.object(adp_module, "index_data_points")
|
|
63
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
64
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
65
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
66
|
+
async def test_add_data_points_indexes_triplets_when_enabled(
|
|
67
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
68
|
+
):
|
|
69
|
+
dp1 = SimplePoint(text="source")
|
|
70
|
+
dp2 = SimplePoint(text="target")
|
|
71
|
+
|
|
72
|
+
edge1 = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "describes"})
|
|
73
|
+
|
|
74
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
75
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
76
|
+
graph_engine = AsyncMock()
|
|
77
|
+
mock_get_engine.return_value = graph_engine
|
|
78
|
+
|
|
79
|
+
await add_data_points([dp1, dp2], embed_triplets=True)
|
|
80
|
+
|
|
81
|
+
assert mock_index_nodes.await_count == 2
|
|
82
|
+
nodes_arg = mock_index_nodes.await_args_list[0].args[0]
|
|
83
|
+
triplets_arg = mock_index_nodes.await_args_list[1].args[0]
|
|
84
|
+
assert nodes_arg == [dp1, dp2]
|
|
85
|
+
assert len(triplets_arg) == 1
|
|
86
|
+
assert isinstance(triplets_arg[0], Triplet)
|
|
87
|
+
mock_index_edges.assert_awaited_once()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
92
|
+
@patch.object(adp_module, "index_data_points")
|
|
93
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
94
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
95
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
96
|
+
async def test_add_data_points_with_empty_list(
|
|
97
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
98
|
+
):
|
|
99
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
100
|
+
graph_engine = AsyncMock()
|
|
101
|
+
mock_get_engine.return_value = graph_engine
|
|
102
|
+
|
|
103
|
+
result = await add_data_points([])
|
|
104
|
+
|
|
105
|
+
assert result == []
|
|
106
|
+
mock_get_graph.assert_not_called()
|
|
107
|
+
graph_engine.add_nodes.assert_awaited_once_with([])
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
112
|
+
@patch.object(adp_module, "index_data_points")
|
|
113
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
114
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
115
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
116
|
+
async def test_add_data_points_with_single_datapoint(
|
|
117
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
118
|
+
):
|
|
119
|
+
dp = SimplePoint(text="single")
|
|
120
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
121
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
122
|
+
graph_engine = AsyncMock()
|
|
123
|
+
mock_get_engine.return_value = graph_engine
|
|
124
|
+
|
|
125
|
+
result = await add_data_points([dp])
|
|
126
|
+
|
|
127
|
+
assert result == [dp]
|
|
128
|
+
mock_get_graph.assert_called_once()
|
|
129
|
+
mock_index_nodes.assert_awaited_once()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_extract_embeddable_text_from_datapoint():
|
|
133
|
+
dp = SimplePoint(text="hello world")
|
|
134
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
135
|
+
assert text == "hello world"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_extract_embeddable_text_with_multiple_fields():
|
|
139
|
+
class MultiField(DataPoint):
|
|
140
|
+
title: str
|
|
141
|
+
description: str
|
|
142
|
+
metadata: dict = {"index_fields": ["title", "description"]}
|
|
143
|
+
|
|
144
|
+
dp = MultiField(title="Test", description="Description")
|
|
145
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
146
|
+
assert text == "Test Description"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_extract_embeddable_text_with_no_index_fields():
|
|
150
|
+
class NoIndex(DataPoint):
|
|
151
|
+
text: str
|
|
152
|
+
metadata: dict = {"index_fields": []}
|
|
153
|
+
|
|
154
|
+
dp = NoIndex(text="ignored")
|
|
155
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
156
|
+
assert text == ""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_create_triplets_from_graph():
|
|
160
|
+
dp1 = SimplePoint(text="source node")
|
|
161
|
+
dp2 = SimplePoint(text="target node")
|
|
162
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {"edge_text": "links"})
|
|
163
|
+
|
|
164
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
165
|
+
|
|
166
|
+
assert len(triplets) == 1
|
|
167
|
+
assert isinstance(triplets[0], Triplet)
|
|
168
|
+
assert triplets[0].from_node_id == str(dp1.id)
|
|
169
|
+
assert triplets[0].to_node_id == str(dp2.id)
|
|
170
|
+
assert "source node" in triplets[0].text
|
|
171
|
+
assert "target node" in triplets[0].text
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_extract_embeddable_text_with_none_datapoint():
|
|
175
|
+
text = _extract_embeddable_text_from_datapoint(None)
|
|
176
|
+
assert text == ""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_extract_embeddable_text_without_metadata():
|
|
180
|
+
class NoMetadata(DataPoint):
|
|
181
|
+
text: str
|
|
182
|
+
|
|
183
|
+
dp = NoMetadata(text="test")
|
|
184
|
+
delattr(dp, "metadata")
|
|
185
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
186
|
+
assert text == ""
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_extract_embeddable_text_with_whitespace_only():
|
|
190
|
+
class WhitespaceField(DataPoint):
|
|
191
|
+
text: str
|
|
192
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
193
|
+
|
|
194
|
+
dp = WhitespaceField(text=" ")
|
|
195
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
196
|
+
assert text == ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_create_triplets_skips_short_edge_tuples():
|
|
200
|
+
dp = SimplePoint(text="node")
|
|
201
|
+
incomplete_edge = (str(dp.id), str(dp.id))
|
|
202
|
+
|
|
203
|
+
triplets = _create_triplets_from_graph([dp], [incomplete_edge])
|
|
204
|
+
|
|
205
|
+
assert len(triplets) == 0
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_create_triplets_skips_missing_source_node():
|
|
209
|
+
dp1 = SimplePoint(text="target")
|
|
210
|
+
edge = ("missing_id", str(dp1.id), "relates", {})
|
|
211
|
+
|
|
212
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
213
|
+
|
|
214
|
+
assert len(triplets) == 0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_create_triplets_skips_missing_target_node():
|
|
218
|
+
dp1 = SimplePoint(text="source")
|
|
219
|
+
edge = (str(dp1.id), "missing_id", "relates", {})
|
|
220
|
+
|
|
221
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
222
|
+
|
|
223
|
+
assert len(triplets) == 0
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_create_triplets_skips_none_relationship():
|
|
227
|
+
dp1 = SimplePoint(text="source")
|
|
228
|
+
dp2 = SimplePoint(text="target")
|
|
229
|
+
edge = (str(dp1.id), str(dp2.id), None, {})
|
|
230
|
+
|
|
231
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
232
|
+
|
|
233
|
+
assert len(triplets) == 0
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_create_triplets_uses_relationship_name_when_no_edge_text():
|
|
237
|
+
dp1 = SimplePoint(text="source")
|
|
238
|
+
dp2 = SimplePoint(text="target")
|
|
239
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {})
|
|
240
|
+
|
|
241
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
242
|
+
|
|
243
|
+
assert len(triplets) == 1
|
|
244
|
+
assert "connects_to" in triplets[0].text
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_create_triplets_prevents_duplicates():
|
|
248
|
+
dp1 = SimplePoint(text="source")
|
|
249
|
+
dp2 = SimplePoint(text="target")
|
|
250
|
+
edge = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "links"})
|
|
251
|
+
|
|
252
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge, edge])
|
|
253
|
+
|
|
254
|
+
assert len(triplets) == 1
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_create_triplets_skips_nodes_without_id():
|
|
258
|
+
class NodeNoId:
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
dp = SimplePoint(text="valid")
|
|
262
|
+
node_no_id = NodeNoId()
|
|
263
|
+
edge = (str(dp.id), "some_id", "relates", {})
|
|
264
|
+
|
|
265
|
+
triplets = _create_triplets_from_graph([dp, node_no_id], [edge])
|
|
266
|
+
|
|
267
|
+
assert len(triplets) == 0
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@pytest.mark.asyncio
|
|
271
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
272
|
+
@patch.object(adp_module, "index_data_points")
|
|
273
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
274
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
275
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
276
|
+
async def test_add_data_points_with_empty_custom_edges(
|
|
277
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
278
|
+
):
|
|
279
|
+
dp = SimplePoint(text="test")
|
|
280
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
281
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
282
|
+
graph_engine = AsyncMock()
|
|
283
|
+
mock_get_engine.return_value = graph_engine
|
|
284
|
+
|
|
285
|
+
result = await add_data_points([dp], custom_edges=[])
|
|
286
|
+
|
|
287
|
+
assert result == [dp]
|
|
288
|
+
assert graph_engine.add_edges.await_count == 1
|