cognee 0.3.2__py3-none-any.whl → 0.3.4.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/api/v1/search/search.py +1 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +2 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/notebooks/methods/create_tutorial_notebook.py +87 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +1 -1
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +31 -9
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_temporal_graph.py +6 -34
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/METADATA +5 -5
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/RECORD +31 -31
- cognee-0.3.4.dev0.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -31,7 +31,8 @@ def get_notebooks_router():
|
|
|
31
31
|
|
|
32
32
|
@router.get("")
|
|
33
33
|
async def get_notebooks_endpoint(user: User = Depends(get_authenticated_user)):
|
|
34
|
-
|
|
34
|
+
async with get_async_session() as session:
|
|
35
|
+
return await get_notebooks(user.id, session)
|
|
35
36
|
|
|
36
37
|
@router.post("")
|
|
37
38
|
async def create_notebook_endpoint(
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -22,7 +22,7 @@ async def search(
|
|
|
22
22
|
node_type: Optional[Type] = NodeSet,
|
|
23
23
|
node_name: Optional[List[str]] = None,
|
|
24
24
|
save_interaction: bool = False,
|
|
25
|
-
last_k: Optional[int] =
|
|
25
|
+
last_k: Optional[int] = 1,
|
|
26
26
|
only_context: bool = False,
|
|
27
27
|
use_combined_context: bool = False,
|
|
28
28
|
) -> Union[List[SearchResult], CombinedSearchResult]:
|
|
@@ -83,7 +83,7 @@ def process_data_for_chroma(data):
|
|
|
83
83
|
elif isinstance(value, list):
|
|
84
84
|
# Store lists as JSON strings with special prefix
|
|
85
85
|
processed_data[f"{key}__list"] = json.dumps(value)
|
|
86
|
-
elif isinstance(value, (str, int, float, bool))
|
|
86
|
+
elif isinstance(value, (str, int, float, bool)):
|
|
87
87
|
processed_data[key] = value
|
|
88
88
|
else:
|
|
89
89
|
processed_data[key] = str(value)
|
|
@@ -553,8 +553,4 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
553
553
|
Returns a list of collection names.
|
|
554
554
|
"""
|
|
555
555
|
client = await self.get_connection()
|
|
556
|
-
|
|
557
|
-
return [
|
|
558
|
-
collection.name if hasattr(collection, "name") else collection["name"]
|
|
559
|
-
for collection in collections
|
|
560
|
-
]
|
|
556
|
+
return await client.list_collections()
|
|
@@ -39,7 +39,7 @@ class VectorConfig(BaseSettings):
|
|
|
39
39
|
values.vector_db_url = ensure_absolute_path(
|
|
40
40
|
values.vector_db_url,
|
|
41
41
|
)
|
|
42
|
-
|
|
42
|
+
elif not values.vector_db_url:
|
|
43
43
|
# Default path
|
|
44
44
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
45
45
|
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
|
@@ -94,10 +94,8 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
94
94
|
"""
|
|
95
95
|
Internal method to call the Ollama embeddings endpoint for a single prompt.
|
|
96
96
|
"""
|
|
97
|
-
payload = {
|
|
98
|
-
|
|
99
|
-
"prompt": prompt,
|
|
100
|
-
}
|
|
97
|
+
payload = {"model": self.model, "prompt": prompt, "input": prompt}
|
|
98
|
+
|
|
101
99
|
headers = {}
|
|
102
100
|
api_key = os.getenv("LLM_API_KEY")
|
|
103
101
|
if api_key:
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
CHANGED
|
@@ -12,6 +12,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
|
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
15
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class AnthropicAdapter(LLMInterface):
|
|
@@ -27,7 +28,8 @@ class AnthropicAdapter(LLMInterface):
|
|
|
27
28
|
import anthropic
|
|
28
29
|
|
|
29
30
|
self.aclient = instructor.patch(
|
|
30
|
-
create=anthropic.AsyncAnthropic().messages.create,
|
|
31
|
+
create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
|
|
32
|
+
mode=instructor.Mode.ANTHROPIC_TOOLS,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
self.model = model
|
|
@@ -57,7 +59,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
57
59
|
|
|
58
60
|
return await self.aclient(
|
|
59
61
|
model=self.model,
|
|
60
|
-
|
|
62
|
+
max_tokens=4096,
|
|
61
63
|
max_retries=5,
|
|
62
64
|
messages=[
|
|
63
65
|
{
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from functools import partial
|
|
3
|
+
import inspect
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
async def run_async(func, *args, loop=None, executor=None, **kwargs):
|
|
6
7
|
if loop is None:
|
|
7
8
|
try:
|
|
8
|
-
|
|
9
|
+
loop = asyncio.get_running_loop()
|
|
9
10
|
except RuntimeError:
|
|
10
|
-
|
|
11
|
+
loop = asyncio.get_event_loop()
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
if "loop" in inspect.signature(func).parameters:
|
|
14
|
+
pfunc = partial(func, *args, loop=loop, **kwargs)
|
|
15
|
+
else:
|
|
16
|
+
pfunc = partial(func, *args, **kwargs)
|
|
17
|
+
|
|
18
|
+
return await loop.run_in_executor(executor, pfunc)
|
|
@@ -2,16 +2,17 @@ import asyncio
|
|
|
2
2
|
import threading
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def run_sync(coro, timeout=None):
|
|
5
|
+
def run_sync(coro, running_loop=None, timeout=None):
|
|
6
6
|
result = None
|
|
7
7
|
exception = None
|
|
8
8
|
|
|
9
9
|
def runner():
|
|
10
|
-
nonlocal result, exception
|
|
10
|
+
nonlocal result, exception, running_loop
|
|
11
11
|
|
|
12
12
|
try:
|
|
13
13
|
try:
|
|
14
|
-
|
|
14
|
+
if not running_loop:
|
|
15
|
+
running_loop = asyncio.get_running_loop()
|
|
15
16
|
|
|
16
17
|
result = asyncio.run_coroutine_threadsafe(coro, running_loop).result(timeout)
|
|
17
18
|
except RuntimeError:
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from uuid import UUID, uuid4
|
|
2
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
3
|
+
|
|
4
|
+
from ..models import NotebookCell
|
|
5
|
+
from .create_notebook import create_notebook
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def create_tutorial_notebook(user_id: UUID, session: AsyncSession):
|
|
9
|
+
await create_notebook(
|
|
10
|
+
user_id=user_id,
|
|
11
|
+
notebook_name="Welcome to cognee 🧠",
|
|
12
|
+
cells=[
|
|
13
|
+
NotebookCell(
|
|
14
|
+
id=uuid4(),
|
|
15
|
+
name="Welcome",
|
|
16
|
+
content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
|
|
17
|
+
type="markdown",
|
|
18
|
+
),
|
|
19
|
+
NotebookCell(
|
|
20
|
+
id=uuid4(),
|
|
21
|
+
name="Example",
|
|
22
|
+
content="",
|
|
23
|
+
type="markdown",
|
|
24
|
+
),
|
|
25
|
+
],
|
|
26
|
+
deletable=False,
|
|
27
|
+
session=session,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
cell_content = [
|
|
32
|
+
"""
|
|
33
|
+
# Using Cognee with Python Development Data
|
|
34
|
+
|
|
35
|
+
Unite authoritative Python practice (Guido van Rossum's own contributions!), normative guidance (Zen/PEP 8), and your lived context (rules + conversations) into one *AI memory* that produces answers that are relevant, explainable, and consistent.
|
|
36
|
+
""",
|
|
37
|
+
"""
|
|
38
|
+
## What You'll Learn
|
|
39
|
+
|
|
40
|
+
In this comprehensive tutorial, you'll discover how to transform scattered development data into an intelligent knowledge system that enhances your coding workflow. By the end, you'll have:
|
|
41
|
+
- Connected disparate data sources (Guido's CPython contributions, mypy development, PEP discussions, your Python projects) into a unified AI memory graph
|
|
42
|
+
- Built an memory layer that understands Python design philosophy, best practice coding patterns, and your preferences and experience
|
|
43
|
+
- Learn how to use intelligent search capabilities that combine the diverse context
|
|
44
|
+
- Integrated everything with your coding environment through MCP (Model Context Protocol)
|
|
45
|
+
|
|
46
|
+
This tutorial demonstrates the power of **knowledge graphs** and **retrieval-augmented generation (RAG)** for software development, showing you how to build systems that learn from Python's creator and improve your own Python development.
|
|
47
|
+
""",
|
|
48
|
+
"""
|
|
49
|
+
## Cognee and its core operations
|
|
50
|
+
|
|
51
|
+
Before we dive in, let's understand the core Cognee operations we'll be working with:
|
|
52
|
+
- `cognee.add()` - Ingests raw data (files, text, APIs) into the system
|
|
53
|
+
- `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
|
|
54
|
+
- `cognee.search()` - Queries the knowledge graph with natural language or Cypher
|
|
55
|
+
- `cognee.memify()` - Cognee's \"secret sauce\" that infers implicit connections and rules from your data
|
|
56
|
+
""",
|
|
57
|
+
"""
|
|
58
|
+
## Data used in this tutorial
|
|
59
|
+
|
|
60
|
+
Cognee can ingest many types of sources. In this tutorial, we use a small, concrete set of files that cover different perspectives:
|
|
61
|
+
- `guido_contributions.json` — Authoritative exemplars. Real PRs and commits from Guido van Rossum (mypy, CPython). These show how Python's creator solved problems and provide concrete anchors for patterns.
|
|
62
|
+
- `pep_style_guide.md` — Norms. Encodes community style and typing conventions (PEP 8 and related). Ensures that search results and inferred rules align with widely accepted standards.
|
|
63
|
+
- `zen_principles.md` — Philosophy. The Zen of Python. Grounds design trade-offs (simplicity, explicitness, readability) beyond syntax or mechanics.
|
|
64
|
+
- `my_developer_rules.md` — Local constraints. Your house rules, conventions, and project-specific requirements (scope, privacy, Spec.md). Keeps recommendations relevant to your actual workflow.
|
|
65
|
+
- `copilot_conversations.json` — Personal history. Transcripts of real assistant conversations, including your questions, code snippets, and discussion topics. Captures "how you code" and connects it to "how Guido codes."
|
|
66
|
+
""",
|
|
67
|
+
"""
|
|
68
|
+
# Preliminaries
|
|
69
|
+
|
|
70
|
+
To strike the balanace between speed, cost, anc quality, we recommend using OpenAI's `4o-mini` model; make sure your `.env` file contains this line:
|
|
71
|
+
`
|
|
72
|
+
LLM_MODEL="gpt-4o-mini"
|
|
73
|
+
`
|
|
74
|
+
""",
|
|
75
|
+
"""
|
|
76
|
+
import cognee
|
|
77
|
+
|
|
78
|
+
result = await cognee.add(
|
|
79
|
+
"file://data/guido_contributions.json",
|
|
80
|
+
node_set=["guido_data"]
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
await cognee.cognify(temporal_cognify=True)
|
|
84
|
+
|
|
85
|
+
results = await cognee.search("Show me commits")
|
|
86
|
+
""",
|
|
87
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Optional
|
|
3
|
-
from sqlalchemy import select
|
|
3
|
+
from sqlalchemy import and_, select
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
@@ -15,7 +15,7 @@ async def get_notebook(
|
|
|
15
15
|
session: AsyncSession,
|
|
16
16
|
) -> Optional[Notebook]:
|
|
17
17
|
result = await session.execute(
|
|
18
|
-
select(Notebook).where(Notebook.owner_id == user_id
|
|
18
|
+
select(Notebook).where(and_(Notebook.owner_id == user_id, Notebook.id == notebook_id))
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
return result.scalar()
|
|
@@ -5,16 +5,18 @@ import traceback
|
|
|
5
5
|
|
|
6
6
|
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
7
|
return (
|
|
8
|
-
"
|
|
9
|
-
"
|
|
8
|
+
"import asyncio\n"
|
|
9
|
+
+ "asyncio.set_event_loop(running_loop)\n\n"
|
|
10
|
+
+ "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
|
|
11
|
+
+ "async def __user_main__():\n"
|
|
10
12
|
+ "\n".join(" " + line for line in user_code.strip().split("\n"))
|
|
11
13
|
+ "\n"
|
|
12
|
-
" globals().update(locals())\n\n"
|
|
13
|
-
"run_sync(__user_main__())\n"
|
|
14
|
+
+ " globals().update(locals())\n\n"
|
|
15
|
+
+ "run_sync(__user_main__(), running_loop)\n"
|
|
14
16
|
)
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def run_in_local_sandbox(code, environment=None):
|
|
19
|
+
def run_in_local_sandbox(code, environment=None, loop=None):
|
|
18
20
|
environment = environment or {}
|
|
19
21
|
code = wrap_in_async_handler(code.replace("\xa0", "\n"))
|
|
20
22
|
|
|
@@ -31,6 +33,7 @@ def run_in_local_sandbox(code, environment=None):
|
|
|
31
33
|
printOutput.append(output)
|
|
32
34
|
|
|
33
35
|
environment["print"] = customPrintFunction
|
|
36
|
+
environment["running_loop"] = loop
|
|
34
37
|
|
|
35
38
|
try:
|
|
36
39
|
exec(code, environment)
|
|
@@ -48,7 +48,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
48
48
|
query: str,
|
|
49
49
|
context: Optional[List[Edge]] = None,
|
|
50
50
|
context_extension_rounds=4,
|
|
51
|
-
) -> str:
|
|
51
|
+
) -> List[str]:
|
|
52
52
|
"""
|
|
53
53
|
Extends the context for a given query by retrieving related triplets and generating new
|
|
54
54
|
completions based on them.
|
|
@@ -136,7 +136,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
136
136
|
|
|
137
137
|
return self.descriptions_to_string(top_k_events)
|
|
138
138
|
|
|
139
|
-
async def get_completion(self, query: str, context: Optional[str] = None) -> str:
|
|
139
|
+
async def get_completion(self, query: str, context: Optional[str] = None) -> List[str]:
|
|
140
140
|
"""Generates a response using the query and optional context."""
|
|
141
141
|
if not context:
|
|
142
142
|
context = await self.get_context(query=query)
|
|
@@ -136,12 +136,19 @@ async def search(
|
|
|
136
136
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
137
137
|
return_value = []
|
|
138
138
|
for search_result in search_results:
|
|
139
|
-
|
|
139
|
+
prepared_search_results = await prepare_search_result(search_result)
|
|
140
|
+
|
|
141
|
+
result = prepared_search_results["result"]
|
|
142
|
+
graphs = prepared_search_results["graphs"]
|
|
143
|
+
context = prepared_search_results["context"]
|
|
144
|
+
datasets = prepared_search_results["datasets"]
|
|
145
|
+
|
|
140
146
|
return_value.append(
|
|
141
147
|
{
|
|
142
|
-
"search_result": result,
|
|
148
|
+
"search_result": [result] if result else None,
|
|
143
149
|
"dataset_id": datasets[0].id,
|
|
144
150
|
"dataset_name": datasets[0].name,
|
|
151
|
+
"graphs": graphs,
|
|
145
152
|
}
|
|
146
153
|
)
|
|
147
154
|
return return_value
|
|
@@ -155,14 +162,6 @@ async def search(
|
|
|
155
162
|
return return_value[0]
|
|
156
163
|
else:
|
|
157
164
|
return return_value
|
|
158
|
-
# return [
|
|
159
|
-
# SearchResult(
|
|
160
|
-
# search_result=result,
|
|
161
|
-
# dataset_id=datasets[min(index, len(datasets) - 1)].id if datasets else None,
|
|
162
|
-
# dataset_name=datasets[min(index, len(datasets) - 1)].name if datasets else None,
|
|
163
|
-
# )
|
|
164
|
-
# for index, (result, _, datasets) in enumerate(search_results)
|
|
165
|
-
# ]
|
|
166
165
|
|
|
167
166
|
|
|
168
167
|
async def authorized_search(
|
|
@@ -208,11 +207,11 @@ async def authorized_search(
|
|
|
208
207
|
context = {}
|
|
209
208
|
datasets: List[Dataset] = []
|
|
210
209
|
|
|
211
|
-
for _, search_context,
|
|
212
|
-
for dataset in
|
|
210
|
+
for _, search_context, search_datasets in search_responses:
|
|
211
|
+
for dataset in search_datasets:
|
|
213
212
|
context[str(dataset.id)] = search_context
|
|
214
213
|
|
|
215
|
-
datasets.extend(
|
|
214
|
+
datasets.extend(search_datasets)
|
|
216
215
|
|
|
217
216
|
specific_search_tools = await get_search_type_tools(
|
|
218
217
|
query_type=query_type,
|
|
@@ -1,40 +1,62 @@
|
|
|
1
1
|
from typing import List, cast
|
|
2
|
+
from uuid import uuid5, NAMESPACE_OID
|
|
2
3
|
|
|
3
4
|
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
4
5
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
6
|
+
from cognee.modules.search.types.SearchResult import SearchResultDataset
|
|
5
7
|
from cognee.modules.search.utils.transform_context_to_graph import transform_context_to_graph
|
|
8
|
+
from cognee.modules.search.utils.transform_insights_to_graph import transform_insights_to_graph
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
async def prepare_search_result(search_result):
|
|
9
|
-
|
|
12
|
+
results, context, datasets = search_result
|
|
10
13
|
|
|
11
14
|
graphs = None
|
|
12
15
|
result_graph = None
|
|
13
16
|
context_texts = {}
|
|
14
17
|
|
|
15
|
-
if isinstance(
|
|
18
|
+
if isinstance(datasets, list) and len(datasets) == 0:
|
|
19
|
+
datasets = [
|
|
20
|
+
SearchResultDataset(
|
|
21
|
+
id=uuid5(NAMESPACE_OID, "*"),
|
|
22
|
+
name="all available datasets",
|
|
23
|
+
)
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
if (
|
|
27
|
+
isinstance(context, List)
|
|
28
|
+
and len(context) > 0
|
|
29
|
+
and isinstance(context[0], tuple)
|
|
30
|
+
and context[0][1].get("relationship_name")
|
|
31
|
+
):
|
|
32
|
+
context_graph = transform_insights_to_graph(context)
|
|
33
|
+
graphs = {
|
|
34
|
+
", ".join([dataset.name for dataset in datasets]): context_graph,
|
|
35
|
+
}
|
|
36
|
+
results = None
|
|
37
|
+
elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
|
|
16
38
|
context_graph = transform_context_to_graph(context)
|
|
17
39
|
|
|
18
40
|
graphs = {
|
|
19
|
-
"
|
|
41
|
+
", ".join([dataset.name for dataset in datasets]): context_graph,
|
|
20
42
|
}
|
|
21
43
|
context_texts = {
|
|
22
|
-
"
|
|
44
|
+
", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text(context),
|
|
23
45
|
}
|
|
24
46
|
elif isinstance(context, str):
|
|
25
47
|
context_texts = {
|
|
26
|
-
"
|
|
48
|
+
", ".join([dataset.name for dataset in datasets]): context,
|
|
27
49
|
}
|
|
28
50
|
elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], str):
|
|
29
51
|
context_texts = {
|
|
30
|
-
"
|
|
52
|
+
", ".join([dataset.name for dataset in datasets]): "\n".join(cast(List[str], context)),
|
|
31
53
|
}
|
|
32
54
|
|
|
33
|
-
if isinstance(
|
|
34
|
-
result_graph = transform_context_to_graph(
|
|
55
|
+
if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge):
|
|
56
|
+
result_graph = transform_context_to_graph(results)
|
|
35
57
|
|
|
36
58
|
return {
|
|
37
|
-
"result": result_graph or
|
|
59
|
+
"result": result_graph or results[0] if results and len(results) == 1 else results,
|
|
38
60
|
"graphs": graphs,
|
|
39
61
|
"context": context_texts,
|
|
40
62
|
"datasets": datasets,
|
|
@@ -14,7 +14,7 @@ def transform_context_to_graph(context: List[Edge]):
|
|
|
14
14
|
if "name" in triplet.node1.attributes
|
|
15
15
|
else triplet.node1.id,
|
|
16
16
|
"type": triplet.node1.attributes["type"],
|
|
17
|
-
"attributes": triplet.
|
|
17
|
+
"attributes": triplet.node1.attributes,
|
|
18
18
|
}
|
|
19
19
|
nodes[triplet.node2.id] = {
|
|
20
20
|
"id": triplet.node2.id,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def transform_insights_to_graph(context: List[Tuple[Dict, Dict, Dict]]):
|
|
5
|
+
nodes = {}
|
|
6
|
+
edges = {}
|
|
7
|
+
|
|
8
|
+
for triplet in context:
|
|
9
|
+
nodes[triplet[0]["id"]] = {
|
|
10
|
+
"id": triplet[0]["id"],
|
|
11
|
+
"label": triplet[0]["name"] if "name" in triplet[0] else triplet[0]["id"],
|
|
12
|
+
"type": triplet[0]["type"],
|
|
13
|
+
}
|
|
14
|
+
nodes[triplet[2]["id"]] = {
|
|
15
|
+
"id": triplet[2]["id"],
|
|
16
|
+
"label": triplet[2]["name"] if "name" in triplet[2] else triplet[2]["id"],
|
|
17
|
+
"type": triplet[2]["type"],
|
|
18
|
+
}
|
|
19
|
+
edges[f"{triplet[0]['id']}_{triplet[1]['relationship_name']}_{triplet[2]['id']}"] = {
|
|
20
|
+
"source": triplet[0]["id"],
|
|
21
|
+
"target": triplet[2]["id"],
|
|
22
|
+
"label": triplet[1]["relationship_name"],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
"nodes": list(nodes.values()),
|
|
27
|
+
"edges": list(edges.values()),
|
|
28
|
+
}
|
|
@@ -3,12 +3,17 @@ from pydantic import BaseModel, Field
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class Timestamp(BaseModel):
|
|
6
|
-
year: int = Field(
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
year: int = Field(
|
|
7
|
+
...,
|
|
8
|
+
ge=1,
|
|
9
|
+
le=9999,
|
|
10
|
+
description="Always required. If only a year is known, use it.",
|
|
11
|
+
)
|
|
12
|
+
month: int = Field(1, ge=1, le=12, description="If unknown, default to 1")
|
|
13
|
+
day: int = Field(1, ge=1, le=31, description="If unknown, default to 1")
|
|
14
|
+
hour: int = Field(0, ge=0, le=23, description="If unknown, default to 0")
|
|
15
|
+
minute: int = Field(0, ge=0, le=59, description="If unknown, default to 0")
|
|
16
|
+
second: int = Field(0, ge=0, le=59, description="If unknown, default to 0")
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
class Interval(BaseModel):
|
|
@@ -49,7 +49,7 @@ class TestCliMain:
|
|
|
49
49
|
def test_main_no_command(self, mock_create_parser):
|
|
50
50
|
"""Test main function when no command is provided"""
|
|
51
51
|
mock_parser = MagicMock()
|
|
52
|
-
mock_parser.parse_args.return_value = MagicMock(command=None)
|
|
52
|
+
mock_parser.parse_args.return_value = MagicMock(command=None, spec={})
|
|
53
53
|
mock_create_parser.return_value = (mock_parser, {})
|
|
54
54
|
|
|
55
55
|
result = main()
|
|
@@ -64,7 +64,7 @@ class TestCliMain:
|
|
|
64
64
|
mock_command.execute.return_value = None
|
|
65
65
|
|
|
66
66
|
mock_parser = MagicMock()
|
|
67
|
-
mock_args = MagicMock(command="test")
|
|
67
|
+
mock_args = MagicMock(command="test", spec={})
|
|
68
68
|
mock_parser.parse_args.return_value = mock_args
|
|
69
69
|
|
|
70
70
|
mock_create_parser.return_value = (mock_parser, {"test": mock_command})
|
|
@@ -84,7 +84,7 @@ class TestCliMain:
|
|
|
84
84
|
mock_command.execute.side_effect = CliCommandException("Test error", error_code=2)
|
|
85
85
|
|
|
86
86
|
mock_parser = MagicMock()
|
|
87
|
-
mock_args = MagicMock(command="test")
|
|
87
|
+
mock_args = MagicMock(command="test", spec={})
|
|
88
88
|
mock_parser.parse_args.return_value = mock_args
|
|
89
89
|
|
|
90
90
|
mock_create_parser.return_value = (mock_parser, {"test": mock_command})
|
|
@@ -103,7 +103,7 @@ class TestCliMain:
|
|
|
103
103
|
mock_command.execute.side_effect = Exception("Generic error")
|
|
104
104
|
|
|
105
105
|
mock_parser = MagicMock()
|
|
106
|
-
mock_args = MagicMock(command="test")
|
|
106
|
+
mock_args = MagicMock(command="test", spec={})
|
|
107
107
|
mock_parser.parse_args.return_value = mock_args
|
|
108
108
|
|
|
109
109
|
mock_create_parser.return_value = (mock_parser, {"test": mock_command})
|
|
@@ -126,7 +126,7 @@ class TestCliMain:
|
|
|
126
126
|
mock_command.execute.side_effect = test_exception
|
|
127
127
|
|
|
128
128
|
mock_parser = MagicMock()
|
|
129
|
-
mock_args = MagicMock(command="test")
|
|
129
|
+
mock_args = MagicMock(command="test", spec={})
|
|
130
130
|
mock_parser.parse_args.return_value = mock_args
|
|
131
131
|
|
|
132
132
|
mock_create_parser.return_value = (mock_parser, {"test": mock_command})
|
|
@@ -97,7 +97,7 @@ async def main():
|
|
|
97
97
|
f"Expected exactly one DocumentChunk, but found {type_counts.get('DocumentChunk', 0)}"
|
|
98
98
|
)
|
|
99
99
|
|
|
100
|
-
assert type_counts.get("Entity", 0) >=
|
|
100
|
+
assert type_counts.get("Entity", 0) >= 10, (
|
|
101
101
|
f"Expected multiple entities (assert is set to 20), but found {type_counts.get('Entity', 0)}"
|
|
102
102
|
)
|
|
103
103
|
|
|
@@ -105,52 +105,24 @@ async def main():
|
|
|
105
105
|
f"Expected multiple entity types, but found {type_counts.get('EntityType', 0)}"
|
|
106
106
|
)
|
|
107
107
|
|
|
108
|
-
assert type_counts.get("Event", 0) >=
|
|
108
|
+
assert type_counts.get("Event", 0) >= 10, (
|
|
109
109
|
f"Expected multiple events (assert is set to 20), but found {type_counts.get('Event', 0)}"
|
|
110
110
|
)
|
|
111
111
|
|
|
112
|
-
assert type_counts.get("Timestamp", 0) >=
|
|
113
|
-
f"Expected multiple timestamps (assert is set to
|
|
112
|
+
assert type_counts.get("Timestamp", 0) >= 10, (
|
|
113
|
+
f"Expected multiple timestamps (assert is set to 10), but found {type_counts.get('Timestamp', 0)}"
|
|
114
114
|
)
|
|
115
115
|
|
|
116
|
-
assert
|
|
117
|
-
f"Expected multiple intervals, but found {type_counts.get('Interval', 0)}"
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
assert edge_type_counts.get("contains", 0) >= 20, (
|
|
116
|
+
assert edge_type_counts.get("contains", 0) >= 10, (
|
|
121
117
|
f"Expected multiple 'contains' edge, but found {edge_type_counts.get('contains', 0)}"
|
|
122
118
|
)
|
|
123
119
|
|
|
124
|
-
assert edge_type_counts.get("is_a", 0) >=
|
|
120
|
+
assert edge_type_counts.get("is_a", 0) >= 10, (
|
|
125
121
|
f"Expected multiple 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}"
|
|
126
122
|
)
|
|
127
123
|
|
|
128
|
-
assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), (
|
|
129
|
-
"Expected the same amount of during and interval objects in the graph"
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), (
|
|
133
|
-
"Expected the same amount of during and interval objects in the graph"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
assert edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0), (
|
|
137
|
-
"Expected the same amount of time_from and interval objects in the graph"
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
assert edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0), (
|
|
141
|
-
"Expected the same amount of time_to and interval objects in the graph"
|
|
142
|
-
)
|
|
143
|
-
|
|
144
124
|
retriever = TemporalRetriever()
|
|
145
125
|
|
|
146
|
-
result_before = await retriever.extract_time_from_query("What happened before 1890?")
|
|
147
|
-
|
|
148
|
-
assert result_before[0] is None
|
|
149
|
-
|
|
150
|
-
result_after = await retriever.extract_time_from_query("What happened after 1891?")
|
|
151
|
-
|
|
152
|
-
assert result_after[1] is None
|
|
153
|
-
|
|
154
126
|
result_between = await retriever.extract_time_from_query("What happened between 1890 and 1900?")
|
|
155
127
|
|
|
156
128
|
assert result_between[1]
|