cognee 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/notebooks/methods/create_tutorial_notebook.py +92 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +1 -1
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/utils/prepare_search_result.py +4 -4
- {cognee-0.3.2.dist-info → cognee-0.3.3.dist-info}/METADATA +1 -1
- {cognee-0.3.2.dist-info → cognee-0.3.3.dist-info}/RECORD +21 -22
- cognee-0.3.3.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.3.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.3.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -31,7 +31,8 @@ def get_notebooks_router():
|
|
|
31
31
|
|
|
32
32
|
@router.get("")
|
|
33
33
|
async def get_notebooks_endpoint(user: User = Depends(get_authenticated_user)):
|
|
34
|
-
|
|
34
|
+
async with get_async_session() as session:
|
|
35
|
+
return await get_notebooks(user.id, session)
|
|
35
36
|
|
|
36
37
|
@router.post("")
|
|
37
38
|
async def create_notebook_endpoint(
|
|
@@ -39,7 +39,7 @@ class VectorConfig(BaseSettings):
|
|
|
39
39
|
values.vector_db_url = ensure_absolute_path(
|
|
40
40
|
values.vector_db_url,
|
|
41
41
|
)
|
|
42
|
-
|
|
42
|
+
elif not values.vector_db_url:
|
|
43
43
|
# Default path
|
|
44
44
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
45
45
|
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from functools import partial
|
|
3
|
+
import inspect
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
async def run_async(func, *args, loop=None, executor=None, **kwargs):
|
|
6
7
|
if loop is None:
|
|
7
8
|
try:
|
|
8
|
-
|
|
9
|
+
loop = asyncio.get_running_loop()
|
|
9
10
|
except RuntimeError:
|
|
10
|
-
|
|
11
|
+
loop = asyncio.get_event_loop()
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
if "loop" in inspect.signature(func).parameters:
|
|
14
|
+
pfunc = partial(func, *args, loop=loop, **kwargs)
|
|
15
|
+
else:
|
|
16
|
+
pfunc = partial(func, *args, **kwargs)
|
|
17
|
+
|
|
18
|
+
return await loop.run_in_executor(executor, pfunc)
|
|
@@ -2,16 +2,17 @@ import asyncio
|
|
|
2
2
|
import threading
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def run_sync(coro, timeout=None):
|
|
5
|
+
def run_sync(coro, running_loop=None, timeout=None):
|
|
6
6
|
result = None
|
|
7
7
|
exception = None
|
|
8
8
|
|
|
9
9
|
def runner():
|
|
10
|
-
nonlocal result, exception
|
|
10
|
+
nonlocal result, exception, running_loop
|
|
11
11
|
|
|
12
12
|
try:
|
|
13
13
|
try:
|
|
14
|
-
|
|
14
|
+
if not running_loop:
|
|
15
|
+
running_loop = asyncio.get_running_loop()
|
|
15
16
|
|
|
16
17
|
result = asyncio.run_coroutine_threadsafe(coro, running_loop).result(timeout)
|
|
17
18
|
except RuntimeError:
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
|
|
2
|
+
from uuid import UUID, uuid4
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
|
|
5
|
+
from ..models import NotebookCell
|
|
6
|
+
from .create_notebook import create_notebook
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def create_tutorial_notebook(user_id: UUID, session: AsyncSession):
|
|
10
|
+
await create_notebook(
|
|
11
|
+
user_id=user_id,
|
|
12
|
+
notebook_name="Welcome to cognee 🧠",
|
|
13
|
+
cells=[
|
|
14
|
+
NotebookCell(
|
|
15
|
+
id=uuid4(),
|
|
16
|
+
name="Welcome",
|
|
17
|
+
content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
|
|
18
|
+
type="markdown",
|
|
19
|
+
),
|
|
20
|
+
NotebookCell(
|
|
21
|
+
id=uuid4(),
|
|
22
|
+
name="Example",
|
|
23
|
+
content="",
|
|
24
|
+
type="markdown",
|
|
25
|
+
),
|
|
26
|
+
],
|
|
27
|
+
deletable=False,
|
|
28
|
+
session=session,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
cell_content = [
|
|
32
|
+
"""
|
|
33
|
+
# Using Cognee with Python Development Data
|
|
34
|
+
|
|
35
|
+
Unite authoritative Python practice (Guido van Rossum's own contributions!), normative guidance (Zen/PEP 8), and your lived context (rules + conversations) into one *AI memory* that produces answers that are relevant, explainable, and consistent.
|
|
36
|
+
""",
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
## What You'll Learn
|
|
40
|
+
|
|
41
|
+
In this comprehensive tutorial, you'll discover how to transform scattered development data into an intelligent knowledge system that enhances your coding workflow. By the end, you'll have:
|
|
42
|
+
- Connected disparate data sources (Guido's CPython contributions, mypy development, PEP discussions, your Python projects) into a unified AI memory graph
|
|
43
|
+
- Built an memory layer that understands Python design philosophy, best practice coding patterns, and your preferences and experience
|
|
44
|
+
- Learn how to use intelligent search capabilities that combine the diverse context
|
|
45
|
+
- Integrated everything with your coding environment through MCP (Model Context Protocol)
|
|
46
|
+
|
|
47
|
+
This tutorial demonstrates the power of **knowledge graphs** and **retrieval-augmented generation (RAG)** for software development, showing you how to build systems that learn from Python's creator and improve your own Python development.
|
|
48
|
+
""",
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
## Cognee and its core operations
|
|
52
|
+
|
|
53
|
+
Before we dive in, let's understand the core Cognee operations we'll be working with:
|
|
54
|
+
- `cognee.add()` - Ingests raw data (files, text, APIs) into the system
|
|
55
|
+
- `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
|
|
56
|
+
- `cognee.search()` - Queries the knowledge graph with natural language or Cypher
|
|
57
|
+
- `cognee.memify()` - Cognee's \"secret sauce\" that infers implicit connections and rules from your data
|
|
58
|
+
""",
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
## Data used in this tutorial
|
|
62
|
+
|
|
63
|
+
Cognee can ingest many types of sources. In this tutorial, we use a small, concrete set of files that cover different perspectives:
|
|
64
|
+
- `guido_contributions.json` — Authoritative exemplars. Real PRs and commits from Guido van Rossum (mypy, CPython). These show how Python's creator solved problems and provide concrete anchors for patterns.
|
|
65
|
+
- `pep_style_guide.md` — Norms. Encodes community style and typing conventions (PEP 8 and related). Ensures that search results and inferred rules align with widely accepted standards.
|
|
66
|
+
- `zen_principles.md` — Philosophy. The Zen of Python. Grounds design trade-offs (simplicity, explicitness, readability) beyond syntax or mechanics.
|
|
67
|
+
- `my_developer_rules.md` — Local constraints. Your house rules, conventions, and project-specific requirements (scope, privacy, Spec.md). Keeps recommendations relevant to your actual workflow.
|
|
68
|
+
- `copilot_conversations.json` — Personal history. Transcripts of real assistant conversations, including your questions, code snippets, and discussion topics. Captures "how you code" and connects it to "how Guido codes."
|
|
69
|
+
""",
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
# Preliminaries
|
|
73
|
+
|
|
74
|
+
To strike the balanace between speed, cost, anc quality, we recommend using OpenAI's `4o-mini` model; make sure your `.env` file contains this line:
|
|
75
|
+
`
|
|
76
|
+
LLM_MODEL="gpt-4o-mini"
|
|
77
|
+
`
|
|
78
|
+
""",
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
import cognee
|
|
82
|
+
|
|
83
|
+
result = await cognee.add(
|
|
84
|
+
"file://data/guido_contributions.json",
|
|
85
|
+
node_set=["guido_data"]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
await cognee.cognify(temporal_cognify=True)
|
|
89
|
+
|
|
90
|
+
results = await cognee.search("Show me commits")
|
|
91
|
+
"""
|
|
92
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Optional
|
|
3
|
-
from sqlalchemy import select
|
|
3
|
+
from sqlalchemy import and_, select
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
@@ -15,7 +15,7 @@ async def get_notebook(
|
|
|
15
15
|
session: AsyncSession,
|
|
16
16
|
) -> Optional[Notebook]:
|
|
17
17
|
result = await session.execute(
|
|
18
|
-
select(Notebook).where(Notebook.owner_id == user_id
|
|
18
|
+
select(Notebook).where(and_(Notebook.owner_id == user_id, Notebook.id == notebook_id))
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
return result.scalar()
|
|
@@ -5,16 +5,18 @@ import traceback
|
|
|
5
5
|
|
|
6
6
|
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
7
|
return (
|
|
8
|
-
"
|
|
9
|
-
"
|
|
8
|
+
"import asyncio\n"
|
|
9
|
+
+ "asyncio.set_event_loop(running_loop)\n\n"
|
|
10
|
+
+ "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
|
|
11
|
+
+ "async def __user_main__():\n"
|
|
10
12
|
+ "\n".join(" " + line for line in user_code.strip().split("\n"))
|
|
11
13
|
+ "\n"
|
|
12
|
-
" globals().update(locals())\n\n"
|
|
13
|
-
"run_sync(__user_main__())\n"
|
|
14
|
+
+ " globals().update(locals())\n\n"
|
|
15
|
+
+ "run_sync(__user_main__(), running_loop)\n"
|
|
14
16
|
)
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def run_in_local_sandbox(code, environment=None):
|
|
19
|
+
def run_in_local_sandbox(code, environment=None, loop=None):
|
|
18
20
|
environment = environment or {}
|
|
19
21
|
code = wrap_in_async_handler(code.replace("\xa0", "\n"))
|
|
20
22
|
|
|
@@ -31,6 +33,7 @@ def run_in_local_sandbox(code, environment=None):
|
|
|
31
33
|
printOutput.append(output)
|
|
32
34
|
|
|
33
35
|
environment["print"] = customPrintFunction
|
|
36
|
+
environment["running_loop"] = loop
|
|
34
37
|
|
|
35
38
|
try:
|
|
36
39
|
exec(code, environment)
|
|
@@ -48,7 +48,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
48
48
|
query: str,
|
|
49
49
|
context: Optional[List[Edge]] = None,
|
|
50
50
|
context_extension_rounds=4,
|
|
51
|
-
) -> str:
|
|
51
|
+
) -> List[str]:
|
|
52
52
|
"""
|
|
53
53
|
Extends the context for a given query by retrieving related triplets and generating new
|
|
54
54
|
completions based on them.
|
|
@@ -136,7 +136,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
136
136
|
|
|
137
137
|
return self.descriptions_to_string(top_k_events)
|
|
138
138
|
|
|
139
|
-
async def get_completion(self, query: str, context: Optional[str] = None) -> str:
|
|
139
|
+
async def get_completion(self, query: str, context: Optional[str] = None) -> List[str]:
|
|
140
140
|
"""Generates a response using the query and optional context."""
|
|
141
141
|
if not context:
|
|
142
142
|
context = await self.get_context(query=query)
|
|
@@ -6,7 +6,7 @@ from cognee.modules.search.utils.transform_context_to_graph import transform_con
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
async def prepare_search_result(search_result):
|
|
9
|
-
|
|
9
|
+
results, context, datasets = search_result
|
|
10
10
|
|
|
11
11
|
graphs = None
|
|
12
12
|
result_graph = None
|
|
@@ -30,11 +30,11 @@ async def prepare_search_result(search_result):
|
|
|
30
30
|
"*": "\n".join(cast(List[str], context)),
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
if isinstance(
|
|
34
|
-
result_graph = transform_context_to_graph(
|
|
33
|
+
if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge):
|
|
34
|
+
result_graph = transform_context_to_graph(results)
|
|
35
35
|
|
|
36
36
|
return {
|
|
37
|
-
"result": result_graph or
|
|
37
|
+
"result": result_graph or results[0] if len(results) == 1 else results,
|
|
38
38
|
"graphs": graphs,
|
|
39
39
|
"context": context_texts,
|
|
40
40
|
"datasets": datasets,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -41,7 +41,7 @@ cognee/api/v1/memify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
41
41
|
cognee/api/v1/memify/routers/__init__.py,sha256=Uv25PVGhfjnNi1NYWOmOLIlzaeTlyMYF9m7BEfdu45Q,49
|
|
42
42
|
cognee/api/v1/memify/routers/get_memify_router.py,sha256=C1Cjt9D5TxhqBPmXZGNrCS4lJqPVXIJYgxZFtWVjZNs,4599
|
|
43
43
|
cognee/api/v1/notebooks/routers/__init__.py,sha256=TvQz6caluaMoXNvjbE1p_C8savypgs8rAyP5lQ8jlpc,55
|
|
44
|
-
cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=
|
|
44
|
+
cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=m8OH3Kw1UHF8aTP4yNuSpv7gNThE4HxmLIrUnvECYGA,3484
|
|
45
45
|
cognee/api/v1/permissions/routers/__init__.py,sha256=ljE3YnrzlMcVfThmkR5GSIxkm7sQVyibaLNtYQL4HO0,59
|
|
46
46
|
cognee/api/v1/permissions/routers/get_permissions_router.py,sha256=tqd-J__UBlstTWnQocesdjVM9JnYO5rtJhhFj-Zv1_o,8316
|
|
47
47
|
cognee/api/v1/prune/__init__.py,sha256=FEr5tTlX7wf3X4aFff6NPlVhNrPyqx7RBoJ71bJN1cY,25
|
|
@@ -53,7 +53,6 @@ cognee/api/v1/responses/models.py,sha256=MylzSnK-QB0kXe7nS-Mu4XRKZa-uBw8qP7Ke9On
|
|
|
53
53
|
cognee/api/v1/responses/routers/__init__.py,sha256=X2qishwGRVFXawnvkZ5bv420PuPRLvknaFO2jdfiR10,122
|
|
54
54
|
cognee/api/v1/responses/routers/default_tools.py,sha256=9qqzEZhrt3_YMKzUA06ke8P-2WeLXhYpKgVW6mLHlzw,3004
|
|
55
55
|
cognee/api/v1/responses/routers/get_responses_router.py,sha256=ggbLhY9IXaInCgIs5TUuOCkFW64xmTKZQsc2ENq2Ocs,5979
|
|
56
|
-
cognee/api/v1/save/save.py,sha256=xRthVNANIsrVJlLa5QKrdSiwCSckr7HBLmoeVJ_gEdE,12639
|
|
57
56
|
cognee/api/v1/search/__init__.py,sha256=Sqw60DcOj4Bnvt-EWFknT31sPcvROIRKCWLr5pbkFr4,39
|
|
58
57
|
cognee/api/v1/search/search.py,sha256=YQicNVi9q4FteAmt_EtY75I_EuNZ9ZjGE73wg-NcDwY,8824
|
|
59
58
|
cognee/api/v1/search/routers/__init__.py,sha256=6RebeLX_2NTRxIMPH_mGuLztPxnGnMJK1y_O93CtRm8,49
|
|
@@ -182,12 +181,12 @@ cognee/infrastructure/databases/relational/get_async_session.py,sha256=qfiXSsTAA
|
|
|
182
181
|
cognee/infrastructure/databases/relational/get_migration_relational_engine.py,sha256=5RtH281iIQo3vqgwmKT0nuiJp9jNd7vw6xRUjc5xIDM,1070
|
|
183
182
|
cognee/infrastructure/databases/relational/get_relational_engine.py,sha256=De51ieg9eFhRLX08k9oNc-oszvt_9J5DHebqI1qI8_U,741
|
|
184
183
|
cognee/infrastructure/databases/relational/with_async_session.py,sha256=UgQeJOvgeM6yhyNDwWdGULtTjZosTnjDlr267Losnfs,803
|
|
185
|
-
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=
|
|
184
|
+
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=j4mnqNJAO-U-Qfveam6NgjIH5lt7WjSMLVlemBrdpYU,27540
|
|
186
185
|
cognee/infrastructure/databases/relational/sqlalchemy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
186
|
cognee/infrastructure/databases/utils/__init__.py,sha256=4C0ncZG-O6bOFJpKgscCHu6D5vodLWRIKpe-WT4Ijbs,75
|
|
188
187
|
cognee/infrastructure/databases/utils/get_or_create_dataset_database.py,sha256=wn7pRgeX-BU0L191_6pgT9P54uhVQlGMPqxQdvIlv4Y,2101
|
|
189
188
|
cognee/infrastructure/databases/vector/__init__.py,sha256=7MdGJ3Mxdh2RyDq39rcjD99liIa-yGXxDUzq--1qQZs,291
|
|
190
|
-
cognee/infrastructure/databases/vector/config.py,sha256=
|
|
189
|
+
cognee/infrastructure/databases/vector/config.py,sha256=4HOmqZOEfVNmAhjxRNePMU9haTVeR35R2XbhPTcMqFg,2952
|
|
191
190
|
cognee/infrastructure/databases/vector/create_vector_engine.py,sha256=ECtICkIW5QM_lX9465ZTxVXC5MCRo_h219q3GyFXxpc,4716
|
|
192
191
|
cognee/infrastructure/databases/vector/get_vector_engine.py,sha256=y4TMWJ6B6DxwKF9PMfjB6WqujPnVhf0oR2j35Q-KhvA,272
|
|
193
192
|
cognee/infrastructure/databases/vector/supported_databases.py,sha256=0UIYcQ15p7-rq5y_2A-E9ydcXyP6frdg8T5e5ECDDMI,25
|
|
@@ -366,8 +365,8 @@ cognee/infrastructure/loaders/external/pypdf_loader.py,sha256=nFa_h3LURBPoguRIID
|
|
|
366
365
|
cognee/infrastructure/loaders/external/unstructured_loader.py,sha256=XCRVHwpM5XmcjRmL4Pr9ELzBU_qYDPhX_Ahn5K8w0AU,4603
|
|
367
366
|
cognee/infrastructure/loaders/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
368
367
|
cognee/infrastructure/utils/calculate_backoff.py,sha256=O6h4MCe357BKaECmLZPLGYpffrMol65LwQCklBj4sh4,935
|
|
369
|
-
cognee/infrastructure/utils/run_async.py,sha256=
|
|
370
|
-
cognee/infrastructure/utils/run_sync.py,sha256=
|
|
368
|
+
cognee/infrastructure/utils/run_async.py,sha256=gZY8ZLG_86O9YVK8hciduIoDONHaEEnGOILh3EeD9LA,510
|
|
369
|
+
cognee/infrastructure/utils/run_sync.py,sha256=9pAXc-EmjtV03exnUMOVSC-IJq_KCslX05z62MHQjlQ,800
|
|
371
370
|
cognee/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
372
371
|
cognee/modules/chunking/Chunker.py,sha256=KezN4WBiV0KNJtx6daMg4g1-a-_oJxn_l_iQT94T1lQ,343
|
|
373
372
|
cognee/modules/chunking/LangchainChunker.py,sha256=Yo9Jza-t3x3V8I8PWbxUu48vlVVdvJKxwzL2gManwDc,2351
|
|
@@ -483,14 +482,15 @@ cognee/modules/metrics/operations/__init__.py,sha256=MZ3xbVdfEKqfLct8WnbyFVyZmkB
|
|
|
483
482
|
cognee/modules/metrics/operations/get_pipeline_run_metrics.py,sha256=upIWnzKeJT1_XbL_ABdGxW-Ai7mO3AqMK35BNmItIQQ,2434
|
|
484
483
|
cognee/modules/notebooks/methods/__init__.py,sha256=IhY4fUVPJbuvS83QESsWzjZRC6oC1I-kJi5gr3kPTLk,215
|
|
485
484
|
cognee/modules/notebooks/methods/create_notebook.py,sha256=S41H3Rha0pj9dEKFy1nBG9atTGHhUdOmDZgr0ckUA6M,633
|
|
485
|
+
cognee/modules/notebooks/methods/create_tutorial_notebook.py,sha256=8YPoDcMUZSNhEWSKxUcPOM61y0St2Z1Y-PC1HFRmlbk,4248
|
|
486
486
|
cognee/modules/notebooks/methods/delete_notebook.py,sha256=BKxoRlPzkwXvTYh5WcF-zo_iVmaXqEiptS42JwB0KQU,309
|
|
487
|
-
cognee/modules/notebooks/methods/get_notebook.py,sha256=
|
|
487
|
+
cognee/modules/notebooks/methods/get_notebook.py,sha256=IP4imsdt9X6GYd6i6WF6PlVhotGNH0i7XZpPqbtqMwo,554
|
|
488
488
|
cognee/modules/notebooks/methods/get_notebooks.py,sha256=ee40ALHvebVORuwZVkQ271qAj260rrYy6eVGxAmfo8c,483
|
|
489
|
-
cognee/modules/notebooks/methods/update_notebook.py,sha256=
|
|
489
|
+
cognee/modules/notebooks/methods/update_notebook.py,sha256=MnZbfh-WfEfH3ImNvyQNhDeNwpYeS7p8FPVwnmBvZVg,361
|
|
490
490
|
cognee/modules/notebooks/models/Notebook.py,sha256=Jth47QxJQ2-VGPyIcS0ul3bS8bgGrk9vCGoJVagxanw,1690
|
|
491
491
|
cognee/modules/notebooks/models/__init__.py,sha256=jldsDjwRvFMreGpe4wxxr5TlFXTZuU7rbsRkGQvTO5s,45
|
|
492
492
|
cognee/modules/notebooks/operations/__init__.py,sha256=VR_2w_d0lEiJ5Xw7_mboo2qWUv0umrR_Bp58MaMoE6w,55
|
|
493
|
-
cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=
|
|
493
|
+
cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=17hMEQC3LZTfPvbRUrPN9SzDeJPWSTq_BAhtwRZiqT8,1338
|
|
494
494
|
cognee/modules/observability/get_observe.py,sha256=chRw4jmpmrwEvDecF9sgApm23IOzVgCbwkKEAyz1_AI,264
|
|
495
495
|
cognee/modules/observability/observers.py,sha256=SKQSWWyGDG0QY2_bqsFgfpLUb7OUL4WFf8tDZYe5JMM,157
|
|
496
496
|
cognee/modules/ontology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -550,15 +550,15 @@ cognee/modules/retrieval/code_retriever.py,sha256=cnOjgfCATzz0-XZGFrIIkuVZLc6HBh
|
|
|
550
550
|
cognee/modules/retrieval/coding_rules_retriever.py,sha256=3GU259jTbGLqmp_A8sUdE4fyf0td06SKuxBJVW-npIQ,1134
|
|
551
551
|
cognee/modules/retrieval/completion_retriever.py,sha256=Lw5sxN_UrtmWSOtcSS7Yj50Gw9p4nNBmW3dr2kV9JJ0,3754
|
|
552
552
|
cognee/modules/retrieval/cypher_search_retriever.py,sha256=_3rZJ23hSZpDa8kVyOSWN3fwjMI_aLF2m5p-FtBek8k,2440
|
|
553
|
-
cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256
|
|
554
|
-
cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=
|
|
555
|
-
cognee/modules/retrieval/graph_completion_retriever.py,sha256=
|
|
553
|
+
cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256=-6yN8gpRlDue8d28rk-Ly-gq0T8BW-i1-Jgbp1x-Zsg,4532
|
|
554
|
+
cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=JU-FkikaU68v8fT8VAmG6jojwhwroKYW2RUxdlJ1R-k,6140
|
|
555
|
+
cognee/modules/retrieval/graph_completion_retriever.py,sha256=VnrFD4xUQewIO83mfmIUcPLA_HBGdUlDVRyA2Pm4ARo,8822
|
|
556
556
|
cognee/modules/retrieval/graph_summary_completion_retriever.py,sha256=3AMisk3fObk2Vh1heY4veHkDjLsHgSSUc_ChZseJUYw,2456
|
|
557
557
|
cognee/modules/retrieval/insights_retriever.py,sha256=1pcYd34EfKk85MSPFQ8b-ZbSARmnauks8TxXfNOxvOw,4953
|
|
558
558
|
cognee/modules/retrieval/natural_language_retriever.py,sha256=zJz35zRmBP8-pRlkoxxSxn3-jtG2lUW0xcu58bq9Ebs,5761
|
|
559
559
|
cognee/modules/retrieval/summaries_retriever.py,sha256=joXYphypACm2JiCjbC8nBS61m1q2oYkzyIt9bdgALNw,3384
|
|
560
|
-
cognee/modules/retrieval/temporal_retriever.py,sha256=
|
|
561
|
-
cognee/modules/retrieval/user_qa_feedback.py,sha256
|
|
560
|
+
cognee/modules/retrieval/temporal_retriever.py,sha256=EUEYN94LpoWfbPjsToe_pC3rFsUUTIPA5K6wNjv8Nds,5685
|
|
561
|
+
cognee/modules/retrieval/user_qa_feedback.py,sha256=-VEOsE_t0FiTy00OpOMWAYv12YSLPieAcMsu82vm7h4,3366
|
|
562
562
|
cognee/modules/retrieval/context_providers/DummyContextProvider.py,sha256=9GsvINc7ekRyRWO5IefFGyytRYqsSlhpwAOw6Q691cA,419
|
|
563
563
|
cognee/modules/retrieval/context_providers/SummarizedTripletSearchContextProvider.py,sha256=ypO6yWLxvmRsj_5dyYdvXTbztJmB_ioLrgyG6bF5WGA,894
|
|
564
564
|
cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py,sha256=8PzksHAtRw7tZarP3nZuxhi0cd1EYEDHOT4Q74mNEvc,3656
|
|
@@ -593,7 +593,7 @@ cognee/modules/search/types/SearchResult.py,sha256=blEean6PRFKcDRQugsojZPfH-Wohx
|
|
|
593
593
|
cognee/modules/search/types/SearchType.py,sha256=-lT4bLKKunV4cL4FfF3tjNbdN7X4AsRMLpTkReNwXZM,594
|
|
594
594
|
cognee/modules/search/types/__init__.py,sha256=8k6OjVrL70W1Jh-ClTbG2ETYIhOtSk3tfqjzYgEdPzA,117
|
|
595
595
|
cognee/modules/search/utils/__init__.py,sha256=86mRtCN-B5-2NNChdQoU5x8_8hqTczGZjBoKVE9O7hA,124
|
|
596
|
-
cognee/modules/search/utils/prepare_search_result.py,sha256=
|
|
596
|
+
cognee/modules/search/utils/prepare_search_result.py,sha256=FTM-tVlprL8EswIcwOy8jO1bRmKG61GZqFfM8FNJUJg,1336
|
|
597
597
|
cognee/modules/search/utils/transform_context_to_graph.py,sha256=rUQeEH-Z-GqAzAZTCetRVpwgrOHlNe3mUBRLwRb0478,1238
|
|
598
598
|
cognee/modules/settings/__init__.py,sha256=_SZQgCQnnnIHLJuKOMO9uWzXNBQxwYHHMUSBp0qa2uQ,210
|
|
599
599
|
cognee/modules/settings/get_current_settings.py,sha256=R2lOusG5Q2PMa2-2vDndh3Lm7nXyZVkdzTV7vQHT81Y,1642
|
|
@@ -789,7 +789,6 @@ cognee/tests/test_remote_kuzu.py,sha256=2GG05MtGuhOo6ST82OxjdVDetBS0GWHvKKmmmEtQ
|
|
|
789
789
|
cognee/tests/test_remote_kuzu_stress.py,sha256=5vgnu4Uz_NoKKqFZJeVceHwb2zNhvdTVBgpN3NjhfAE,5304
|
|
790
790
|
cognee/tests/test_s3.py,sha256=rY2UDK15cdyywlyVrR8N2DRtVXWYIW5REaaz99gaQeE,2694
|
|
791
791
|
cognee/tests/test_s3_file_storage.py,sha256=62tvIFyh_uTP0TFF9Ck4Y-sxWPW-cwJKYEJUJI1atPI,5654
|
|
792
|
-
cognee/tests/test_save_export_path.py,sha256=z07oQao82INzldg2mesS3ZGt7fl7rcjKx15JwoGT5tI,3898
|
|
793
792
|
cognee/tests/test_search_db.py,sha256=4GpLx8ZJoMjkp-XqQ-LCrkf3NhAM4j_rMmlOFgmDO-A,13420
|
|
794
793
|
cognee/tests/test_starter_pipelines.py,sha256=X1J8RDD0bFMKnRETyi5nyaF4TYdmUIu0EuD3WQwShNs,2475
|
|
795
794
|
cognee/tests/test_telemetry.py,sha256=FIneuVofSKWFYqxNC88sT_P5GPzgfjVyqDCf2TYBE2E,4130
|
|
@@ -890,9 +889,9 @@ distributed/tasks/queued_add_edges.py,sha256=kz1DHE05y-kNHORQJjYWHUi6Q1QWUp_v3Dl
|
|
|
890
889
|
distributed/tasks/queued_add_nodes.py,sha256=aqK4Ij--ADwUWknxYpiwbYrpa6CcvFfqHWbUZW4Kh3A,452
|
|
891
890
|
distributed/workers/data_point_saving_worker.py,sha256=jFmA0-P_0Ru2IUDrSug0wML-5goAKrGtlBm5BA5Ryw4,3229
|
|
892
891
|
distributed/workers/graph_saving_worker.py,sha256=oUYl99CdhlrPAIsUOHbHnS3d4XhGoV0_OIbCO8wYzRg,3648
|
|
893
|
-
cognee-0.3.
|
|
894
|
-
cognee-0.3.
|
|
895
|
-
cognee-0.3.
|
|
896
|
-
cognee-0.3.
|
|
897
|
-
cognee-0.3.
|
|
898
|
-
cognee-0.3.
|
|
892
|
+
cognee-0.3.3.dist-info/METADATA,sha256=MofBzxb-pUo59hyKjasnooG9SDbuVPsvy5UK6sjXluA,14753
|
|
893
|
+
cognee-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
894
|
+
cognee-0.3.3.dist-info/entry_points.txt,sha256=GCCTsNg8gzOJkolq7dR7OK1VlIAO202dGDnMI8nm8oQ,55
|
|
895
|
+
cognee-0.3.3.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
|
|
896
|
+
cognee-0.3.3.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
|
|
897
|
+
cognee-0.3.3.dist-info/RECORD,,
|
cognee/api/v1/save/save.py
DELETED
|
@@ -1,335 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import asyncio
|
|
3
|
-
import json
|
|
4
|
-
from typing import Optional, Union, List, Dict
|
|
5
|
-
from uuid import UUID
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel
|
|
8
|
-
|
|
9
|
-
from cognee.base_config import get_base_config
|
|
10
|
-
from cognee.modules.users.models import User
|
|
11
|
-
from cognee.modules.users.methods import get_default_user
|
|
12
|
-
from cognee.modules.data.methods import get_authorized_existing_datasets, get_dataset_data
|
|
13
|
-
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
|
14
|
-
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
15
|
-
from cognee.shared.logging_utils import get_logger
|
|
16
|
-
from cognee.api.v1.search import search
|
|
17
|
-
from cognee.modules.search.types import SearchType
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
logger = get_logger("save")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class QuestionsModel(BaseModel):
|
|
24
|
-
questions: List[str]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _sanitize_filename(name: str) -> str:
|
|
28
|
-
safe = "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in name)
|
|
29
|
-
return safe.strip().replace(" ", "_")
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def _dataset_dir_name(dataset) -> str:
|
|
33
|
-
# Prefer readable dataset name when available, fallback to id
|
|
34
|
-
if getattr(dataset, "name", None):
|
|
35
|
-
return _sanitize_filename(str(dataset.name))
|
|
36
|
-
return str(dataset.id)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _file_markdown_name(data_item, used_names: set[str]) -> str:
|
|
40
|
-
# Use original file name if present, else data.name
|
|
41
|
-
name = getattr(data_item, "name", None) or "file"
|
|
42
|
-
base = _sanitize_filename(str(name))
|
|
43
|
-
filename = f"{base}.md"
|
|
44
|
-
if filename in used_names:
|
|
45
|
-
short_id = str(getattr(data_item, "id", ""))[:8]
|
|
46
|
-
filename = f"{base}__{short_id}.md"
|
|
47
|
-
used_names.add(filename)
|
|
48
|
-
return filename
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def _ascii_path_tree(path_str: str) -> str:
|
|
52
|
-
if not path_str:
|
|
53
|
-
return "(no path)"
|
|
54
|
-
|
|
55
|
-
# Normalize special schemes but keep segments readable
|
|
56
|
-
try:
|
|
57
|
-
normalized = get_data_file_path(path_str)
|
|
58
|
-
except Exception:
|
|
59
|
-
normalized = path_str
|
|
60
|
-
|
|
61
|
-
# Keep the path compact – show last 5 segments
|
|
62
|
-
parts = [p for p in normalized.replace("\\", "/").split("/") if p]
|
|
63
|
-
if len(parts) > 6:
|
|
64
|
-
display = ["…"] + parts[-5:]
|
|
65
|
-
else:
|
|
66
|
-
display = parts
|
|
67
|
-
|
|
68
|
-
# Render a single-branch tree
|
|
69
|
-
lines = []
|
|
70
|
-
for idx, seg in enumerate(display):
|
|
71
|
-
prefix = "└── " if idx == 0 else (" " * idx + "└── ")
|
|
72
|
-
lines.append(f"{prefix}{seg}")
|
|
73
|
-
return "\n".join(lines)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
async def _get_summary_via_summaries(query_text: str, dataset_id: UUID, top_k: int) -> str:
|
|
77
|
-
try:
|
|
78
|
-
results = await search(
|
|
79
|
-
query_text=query_text,
|
|
80
|
-
query_type=SearchType.SUMMARIES,
|
|
81
|
-
dataset_ids=[dataset_id],
|
|
82
|
-
top_k=top_k,
|
|
83
|
-
)
|
|
84
|
-
if not results:
|
|
85
|
-
return ""
|
|
86
|
-
texts: List[str] = []
|
|
87
|
-
for r in results[:top_k]:
|
|
88
|
-
texts.append(str(r))
|
|
89
|
-
return "\n\n".join(texts)
|
|
90
|
-
except Exception as e:
|
|
91
|
-
logger.error(
|
|
92
|
-
"SUMMARIES search failed for '%s' in dataset %s: %s",
|
|
93
|
-
query_text,
|
|
94
|
-
str(dataset_id),
|
|
95
|
-
str(e),
|
|
96
|
-
)
|
|
97
|
-
return ""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
async def _generate_questions(file_name: str, summary_text: str) -> List[str]:
|
|
101
|
-
prompt = (
|
|
102
|
-
"You are an expert analyst. Given a file and its summary, propose 10 diverse, high-signal "
|
|
103
|
-
"questions to further explore the file's content, implications, relationships, and gaps. "
|
|
104
|
-
"Avoid duplicates; vary depth and angle (overview, details, cross-references, temporal, quality).\n\n"
|
|
105
|
-
f"File: {file_name}\n\nSummary:\n{summary_text[:4000]}"
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
model = await LLMGateway.acreate_structured_output(
|
|
109
|
-
text_input=prompt,
|
|
110
|
-
system_prompt="Return strictly a JSON with key 'questions' and value as an array of 10 concise strings.",
|
|
111
|
-
response_model=QuestionsModel,
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
# model can be either pydantic model or dict-like, normalize
|
|
115
|
-
try:
|
|
116
|
-
questions = list(getattr(model, "questions", []))
|
|
117
|
-
except Exception:
|
|
118
|
-
questions = []
|
|
119
|
-
|
|
120
|
-
# Fallback if the tool returned a dict-like
|
|
121
|
-
if not questions and isinstance(model, dict):
|
|
122
|
-
questions = list(model.get("questions", []) or [])
|
|
123
|
-
|
|
124
|
-
# Enforce 10 max
|
|
125
|
-
return questions[:10]
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
async def _run_searches_for_question(
|
|
129
|
-
question: str, dataset_id: UUID, search_types: List[SearchType], top_k: int
|
|
130
|
-
) -> Dict[str, Union[str, List[dict], List[str]]]:
|
|
131
|
-
async def run_one(st: SearchType):
|
|
132
|
-
try:
|
|
133
|
-
result = await search(
|
|
134
|
-
query_text=question,
|
|
135
|
-
query_type=st,
|
|
136
|
-
dataset_ids=[dataset_id],
|
|
137
|
-
top_k=top_k,
|
|
138
|
-
)
|
|
139
|
-
return st.value, result
|
|
140
|
-
except Exception as e:
|
|
141
|
-
logger.error("Search failed for type %s: %s", st.value, str(e))
|
|
142
|
-
return st.value, [f"Error: {str(e)}"]
|
|
143
|
-
|
|
144
|
-
pairs = await asyncio.gather(*[run_one(st) for st in search_types])
|
|
145
|
-
return {k: v for k, v in pairs}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _format_results_md(results: Dict[str, Union[str, List[dict], List[str]]]) -> str:
|
|
149
|
-
lines: List[str] = []
|
|
150
|
-
for st, payload in results.items():
|
|
151
|
-
lines.append(f"#### {st}")
|
|
152
|
-
if isinstance(payload, list):
|
|
153
|
-
# Printed as bullet items; stringify dicts
|
|
154
|
-
for item in payload[:5]:
|
|
155
|
-
if isinstance(item, dict):
|
|
156
|
-
# compact representation
|
|
157
|
-
snippet = json.dumps(item, ensure_ascii=False)[:800]
|
|
158
|
-
lines.append(f"- {snippet}")
|
|
159
|
-
else:
|
|
160
|
-
text = str(item)
|
|
161
|
-
lines.append(f"- {text[:800]}")
|
|
162
|
-
else:
|
|
163
|
-
lines.append(str(payload))
|
|
164
|
-
lines.append("")
|
|
165
|
-
return "\n".join(lines)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
async def save(
|
|
169
|
-
datasets: Optional[Union[List[str], List[UUID]]] = None,
|
|
170
|
-
export_root_directory: Optional[str] = None,
|
|
171
|
-
user: Optional[User] = None,
|
|
172
|
-
# Configurable knobs
|
|
173
|
-
max_questions: int = 10,
|
|
174
|
-
search_types: Optional[List[Union[str, SearchType]]] = None,
|
|
175
|
-
top_k: int = 5,
|
|
176
|
-
include_summary: bool = True,
|
|
177
|
-
include_ascii_tree: bool = True,
|
|
178
|
-
concurrency: int = 4,
|
|
179
|
-
timeout: Optional[float] = None,
|
|
180
|
-
) -> Dict[str, str]:
|
|
181
|
-
"""
|
|
182
|
-
Export per-dataset markdown summaries and search insights for each ingested file.
|
|
183
|
-
|
|
184
|
-
For every dataset the user can read:
|
|
185
|
-
- Create a folder under export_root_directory (or data_root_directory/exports)
|
|
186
|
-
- For each data item (file), create a .md containing:
|
|
187
|
-
- Summary of the file (from existing TextSummary nodes)
|
|
188
|
-
- A small ASCII path tree showing its folder position
|
|
189
|
-
- Up to N LLM-generated question ideas (configurable)
|
|
190
|
-
- Results of configured Cognee searches per question
|
|
191
|
-
Also creates an index.md per dataset with links to files and an optional dataset summary.
|
|
192
|
-
|
|
193
|
-
Returns a mapping of dataset_id -> export_directory path.
|
|
194
|
-
"""
|
|
195
|
-
base_config = get_base_config()
|
|
196
|
-
export_root = export_root_directory or os.path.join(
|
|
197
|
-
base_config.data_root_directory, "memory_export"
|
|
198
|
-
)
|
|
199
|
-
os.makedirs(export_root, exist_ok=True)
|
|
200
|
-
|
|
201
|
-
if user is None:
|
|
202
|
-
user = await get_default_user()
|
|
203
|
-
|
|
204
|
-
datasets_list = await get_authorized_existing_datasets(datasets, "read", user)
|
|
205
|
-
results: Dict[str, str] = {}
|
|
206
|
-
|
|
207
|
-
for dataset in datasets_list:
|
|
208
|
-
ds_dir = os.path.join(export_root, _dataset_dir_name(dataset))
|
|
209
|
-
os.makedirs(ds_dir, exist_ok=True)
|
|
210
|
-
results[str(dataset.id)] = ds_dir
|
|
211
|
-
|
|
212
|
-
data_items = await get_dataset_data(dataset.id)
|
|
213
|
-
|
|
214
|
-
# Normalize search types
|
|
215
|
-
if not search_types:
|
|
216
|
-
effective_search_types = [
|
|
217
|
-
SearchType.GRAPH_COMPLETION,
|
|
218
|
-
SearchType.INSIGHTS,
|
|
219
|
-
SearchType.CHUNKS,
|
|
220
|
-
]
|
|
221
|
-
else:
|
|
222
|
-
effective_search_types = []
|
|
223
|
-
for st in search_types:
|
|
224
|
-
if isinstance(st, SearchType):
|
|
225
|
-
effective_search_types.append(st)
|
|
226
|
-
else:
|
|
227
|
-
try:
|
|
228
|
-
effective_search_types.append(SearchType[str(st)])
|
|
229
|
-
except Exception:
|
|
230
|
-
logger.warning("Unknown search type '%s', skipping", str(st))
|
|
231
|
-
|
|
232
|
-
sem = asyncio.Semaphore(max(1, int(concurrency)))
|
|
233
|
-
used_names: set[str] = set()
|
|
234
|
-
index_entries: List[tuple[str, str]] = []
|
|
235
|
-
|
|
236
|
-
async def process_one(data_item):
|
|
237
|
-
async with sem:
|
|
238
|
-
file_label = getattr(data_item, "name", str(data_item.id))
|
|
239
|
-
original_path = getattr(data_item, "original_data_location", None)
|
|
240
|
-
|
|
241
|
-
ascii_tree = (
|
|
242
|
-
_ascii_path_tree(original_path or file_label) if include_ascii_tree else ""
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
summary_text = ""
|
|
246
|
-
if include_summary:
|
|
247
|
-
# Use SUMMARIES search scoped to dataset to derive file summary
|
|
248
|
-
file_query = getattr(data_item, "name", str(data_item.id)) or "file"
|
|
249
|
-
summary_text = await _get_summary_via_summaries(file_query, dataset.id, top_k)
|
|
250
|
-
if not summary_text:
|
|
251
|
-
summary_text = "Summary not available."
|
|
252
|
-
|
|
253
|
-
if max_questions == 0:
|
|
254
|
-
questions = []
|
|
255
|
-
else:
|
|
256
|
-
questions = await _generate_questions(file_label, summary_text)
|
|
257
|
-
if max_questions is not None and max_questions >= 0:
|
|
258
|
-
questions = questions[:max_questions]
|
|
259
|
-
|
|
260
|
-
async def searches_for_question(q: str):
|
|
261
|
-
return await _run_searches_for_question(
|
|
262
|
-
q, dataset.id, effective_search_types, top_k
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
# Run per-question searches concurrently
|
|
266
|
-
per_q_results = await asyncio.gather(*[searches_for_question(q) for q in questions])
|
|
267
|
-
|
|
268
|
-
# Build markdown content
|
|
269
|
-
md_lines = [f"# {file_label}", ""]
|
|
270
|
-
if include_ascii_tree:
|
|
271
|
-
md_lines.extend(["## Location", "", "```", ascii_tree, "```", ""])
|
|
272
|
-
if include_summary:
|
|
273
|
-
md_lines.extend(["## Summary", "", summary_text, ""])
|
|
274
|
-
|
|
275
|
-
md_lines.append("## Question ideas")
|
|
276
|
-
for idx, q in enumerate(questions, start=1):
|
|
277
|
-
md_lines.append(f"- {idx}. {q}")
|
|
278
|
-
md_lines.append("")
|
|
279
|
-
|
|
280
|
-
md_lines.append("## Searches")
|
|
281
|
-
md_lines.append("")
|
|
282
|
-
for q, per_type in zip(questions, per_q_results):
|
|
283
|
-
md_lines.append(f"### Q: {q}")
|
|
284
|
-
md_lines.append(_format_results_md(per_type))
|
|
285
|
-
md_lines.append("")
|
|
286
|
-
|
|
287
|
-
# Write to file (collision-safe)
|
|
288
|
-
md_filename = _file_markdown_name(data_item, used_names)
|
|
289
|
-
export_path = os.path.join(ds_dir, md_filename)
|
|
290
|
-
tmp_path = export_path + ".tmp"
|
|
291
|
-
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
292
|
-
f.write("\n".join(md_lines))
|
|
293
|
-
os.replace(tmp_path, export_path)
|
|
294
|
-
|
|
295
|
-
index_entries.append((file_label, md_filename))
|
|
296
|
-
|
|
297
|
-
tasks = [asyncio.create_task(process_one(item)) for item in data_items]
|
|
298
|
-
|
|
299
|
-
if timeout and timeout > 0:
|
|
300
|
-
try:
|
|
301
|
-
await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout)
|
|
302
|
-
except asyncio.TimeoutError:
|
|
303
|
-
logger.error("Save timed out for dataset %s", str(dataset.id))
|
|
304
|
-
else:
|
|
305
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
306
|
-
|
|
307
|
-
# Build dataset index.md with TOC and optional dataset summary via SUMMARIES
|
|
308
|
-
try:
|
|
309
|
-
index_lines = [f"# Dataset: {_dataset_dir_name(dataset)}", "", "## Files", ""]
|
|
310
|
-
for display, fname in sorted(index_entries, key=lambda x: x[0].lower()):
|
|
311
|
-
index_lines.append(f"- [{display}]({fname})")
|
|
312
|
-
|
|
313
|
-
# Dataset summary section
|
|
314
|
-
try:
|
|
315
|
-
summaries = await search(
|
|
316
|
-
query_text="dataset overview",
|
|
317
|
-
query_type=SearchType.SUMMARIES,
|
|
318
|
-
dataset_ids=[dataset.id],
|
|
319
|
-
top_k=top_k,
|
|
320
|
-
)
|
|
321
|
-
except Exception as e:
|
|
322
|
-
logger.error("Dataset summary search failed: %s", str(e))
|
|
323
|
-
summaries = []
|
|
324
|
-
|
|
325
|
-
if summaries:
|
|
326
|
-
index_lines.extend(["", "## Dataset summary (top summaries)", ""])
|
|
327
|
-
for s in summaries[:top_k]:
|
|
328
|
-
index_lines.append(f"- {str(s)[:800]}")
|
|
329
|
-
|
|
330
|
-
with open(os.path.join(ds_dir, "index.md"), "w", encoding="utf-8") as f:
|
|
331
|
-
f.write("\n".join(index_lines))
|
|
332
|
-
except Exception as e:
|
|
333
|
-
logger.error("Failed to write dataset index for %s: %s", str(dataset.id), str(e))
|
|
334
|
-
|
|
335
|
-
return results
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import asyncio
|
|
3
|
-
from uuid import uuid4
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@pytest.mark.asyncio
|
|
9
|
-
async def test_save_uses_custom_export_path(tmp_path, monkeypatch):
|
|
10
|
-
# Import target after tmp fixtures are ready
|
|
11
|
-
from cognee.api.v1.save import save as save_mod
|
|
12
|
-
|
|
13
|
-
# Prepare two mock datasets
|
|
14
|
-
class Dataset:
|
|
15
|
-
def __init__(self, id_, name):
|
|
16
|
-
self.id = id_
|
|
17
|
-
self.name = name
|
|
18
|
-
|
|
19
|
-
ds1 = Dataset(uuid4(), "dataset_alpha")
|
|
20
|
-
ds2 = Dataset(uuid4(), "dataset_beta")
|
|
21
|
-
|
|
22
|
-
# Mock dataset discovery
|
|
23
|
-
async def mock_get_authorized_existing_datasets(datasets, permission_type, user):
|
|
24
|
-
return [ds1, ds2]
|
|
25
|
-
|
|
26
|
-
monkeypatch.setattr(
|
|
27
|
-
save_mod, "get_authorized_existing_datasets", mock_get_authorized_existing_datasets
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
# Mock data items (with filename collision in ds1)
|
|
31
|
-
class DataItem:
|
|
32
|
-
def __init__(self, id_, name, original_path=None):
|
|
33
|
-
self.id = id_
|
|
34
|
-
self.name = name
|
|
35
|
-
self.original_data_location = original_path
|
|
36
|
-
|
|
37
|
-
ds1_items = [
|
|
38
|
-
DataItem(uuid4(), "report.txt", "/root/a/report.txt"),
|
|
39
|
-
DataItem(uuid4(), "report.txt", "/root/b/report.txt"), # collision
|
|
40
|
-
]
|
|
41
|
-
ds2_items = [
|
|
42
|
-
DataItem(uuid4(), "notes.md", "/root/x/notes.md"),
|
|
43
|
-
]
|
|
44
|
-
|
|
45
|
-
async def mock_get_dataset_data(dataset_id):
|
|
46
|
-
if dataset_id == ds1.id:
|
|
47
|
-
return ds1_items
|
|
48
|
-
if dataset_id == ds2.id:
|
|
49
|
-
return ds2_items
|
|
50
|
-
return []
|
|
51
|
-
|
|
52
|
-
monkeypatch.setattr(save_mod, "get_dataset_data", mock_get_dataset_data)
|
|
53
|
-
|
|
54
|
-
# Mock summary retrieval
|
|
55
|
-
async def mock_get_document_summaries_text(data_id: str) -> str:
|
|
56
|
-
return "This is a summary."
|
|
57
|
-
|
|
58
|
-
monkeypatch.setattr(save_mod, "_get_document_summaries_text", mock_get_document_summaries_text)
|
|
59
|
-
|
|
60
|
-
# Mock questions
|
|
61
|
-
async def mock_generate_questions(file_name: str, summary_text: str):
|
|
62
|
-
return ["Q1?", "Q2?", "Q3?"]
|
|
63
|
-
|
|
64
|
-
monkeypatch.setattr(save_mod, "_generate_questions", mock_generate_questions)
|
|
65
|
-
|
|
66
|
-
# Mock searches per question
|
|
67
|
-
async def mock_run_searches_for_question(question, dataset_id, search_types, top_k):
|
|
68
|
-
return {st.value: [f"{question} -> ok"] for st in search_types}
|
|
69
|
-
|
|
70
|
-
monkeypatch.setattr(save_mod, "_run_searches_for_question", mock_run_searches_for_question)
|
|
71
|
-
|
|
72
|
-
# Use custom export path
|
|
73
|
-
export_dir = tmp_path / "my_exports"
|
|
74
|
-
export_dir_str = str(export_dir)
|
|
75
|
-
|
|
76
|
-
# Run
|
|
77
|
-
result = await save_mod.save(
|
|
78
|
-
datasets=None,
|
|
79
|
-
export_root_directory=export_dir_str,
|
|
80
|
-
max_questions=3,
|
|
81
|
-
search_types=["GRAPH_COMPLETION", "INSIGHTS", "CHUNKS"],
|
|
82
|
-
top_k=2,
|
|
83
|
-
include_summary=True,
|
|
84
|
-
include_ascii_tree=True,
|
|
85
|
-
concurrency=2,
|
|
86
|
-
timeout=None,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
# Verify returned mapping points to our custom path
|
|
90
|
-
assert str(ds1.id) in result and str(ds2.id) in result
|
|
91
|
-
assert result[str(ds1.id)].startswith(export_dir_str)
|
|
92
|
-
assert result[str(ds2.id)].startswith(export_dir_str)
|
|
93
|
-
|
|
94
|
-
# Verify directories and files exist
|
|
95
|
-
ds1_dir = result[str(ds1.id)]
|
|
96
|
-
ds2_dir = result[str(ds2.id)]
|
|
97
|
-
|
|
98
|
-
assert os.path.isdir(ds1_dir)
|
|
99
|
-
assert os.path.isdir(ds2_dir)
|
|
100
|
-
|
|
101
|
-
# index.md present
|
|
102
|
-
assert os.path.isfile(os.path.join(ds1_dir, "index.md"))
|
|
103
|
-
assert os.path.isfile(os.path.join(ds2_dir, "index.md"))
|
|
104
|
-
|
|
105
|
-
# File markdowns exist; collision handling: two files with similar base
|
|
106
|
-
ds1_files = [f for f in os.listdir(ds1_dir) if f.endswith(".md") and f != "index.md"]
|
|
107
|
-
assert len(ds1_files) == 2
|
|
108
|
-
assert any(f == "report.txt.md" for f in ds1_files)
|
|
109
|
-
assert any(f.startswith("report.txt__") and f.endswith(".md") for f in ds1_files)
|
|
110
|
-
|
|
111
|
-
# Content sanity: ensure question headers exist in one file
|
|
112
|
-
sample_md_path = os.path.join(ds1_dir, ds1_files[0])
|
|
113
|
-
with open(sample_md_path, "r", encoding="utf-8") as fh:
|
|
114
|
-
content = fh.read()
|
|
115
|
-
assert "## Question ideas" in content
|
|
116
|
-
assert "## Searches" in content
|
|
File without changes
|
|
File without changes
|
|
File without changes
|