cognee 0.3.0.dev0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/v1/save/save.py +335 -0
  3. cognee/api/v1/search/routers/get_search_router.py +3 -3
  4. cognee/api/v1/ui/__init__.py +1 -0
  5. cognee/api/v1/ui/ui.py +624 -0
  6. cognee/cli/_cognee.py +102 -0
  7. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  8. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  9. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  10. cognee/modules/retrieval/insights_retriever.py +12 -11
  11. cognee/modules/retrieval/temporal_retriever.py +1 -1
  12. cognee/modules/search/methods/search.py +31 -8
  13. cognee/tests/test_permissions.py +3 -3
  14. cognee/tests/test_relational_db_migration.py +3 -5
  15. cognee/tests/test_save_export_path.py +116 -0
  16. cognee/tests/test_search_db.py +10 -7
  17. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -6
  18. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +12 -6
  19. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +2 -4
  20. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/METADATA +2 -2
  21. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/RECORD +34 -30
  22. distributed/pyproject.toml +1 -1
  23. /cognee/tests/{integration/cli → cli_tests/cli_integration_tests}/__init__.py +0 -0
  24. /cognee/tests/{integration/cli → cli_tests/cli_integration_tests}/test_cli_integration.py +0 -0
  25. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/__init__.py +0 -0
  26. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/test_cli_commands.py +0 -0
  27. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/test_cli_edge_cases.py +0 -0
  28. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/test_cli_main.py +0 -0
  29. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/test_cli_runner.py +0 -0
  30. /cognee/tests/{unit/cli → cli_tests/cli_unit_tests}/test_cli_utils.py +0 -0
  31. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/WHEEL +0 -0
  32. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/entry_points.txt +0 -0
  33. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/licenses/LICENSE +0 -0
  34. {cognee-0.3.0.dev0.dist-info → cognee-0.3.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -27,6 +27,7 @@ from .api.v1.visualize import visualize_graph, start_visualization_server
27
27
  from cognee.modules.visualization.cognee_network_visualization import (
28
28
  cognee_network_visualization,
29
29
  )
30
+ from .api.v1.ui import start_ui
30
31
 
31
32
  # Pipelines
32
33
  from .modules import pipelines
@@ -0,0 +1,335 @@
1
+ import os
2
+ import asyncio
3
+ import json
4
+ from typing import Optional, Union, List, Dict
5
+ from uuid import UUID
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from cognee.base_config import get_base_config
10
+ from cognee.modules.users.models import User
11
+ from cognee.modules.users.methods import get_default_user
12
+ from cognee.modules.data.methods import get_authorized_existing_datasets, get_dataset_data
13
+ from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
14
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
15
+ from cognee.shared.logging_utils import get_logger
16
+ from cognee.api.v1.search import search
17
+ from cognee.modules.search.types import SearchType
18
+
19
+
20
+ logger = get_logger("save")
21
+
22
+
23
+ class QuestionsModel(BaseModel):
24
+ questions: List[str]
25
+
26
+
27
+ def _sanitize_filename(name: str) -> str:
28
+ safe = "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in name)
29
+ return safe.strip().replace(" ", "_")
30
+
31
+
32
+ def _dataset_dir_name(dataset) -> str:
33
+ # Prefer readable dataset name when available, fallback to id
34
+ if getattr(dataset, "name", None):
35
+ return _sanitize_filename(str(dataset.name))
36
+ return str(dataset.id)
37
+
38
+
39
+ def _file_markdown_name(data_item, used_names: set[str]) -> str:
40
+ # Use original file name if present, else data.name
41
+ name = getattr(data_item, "name", None) or "file"
42
+ base = _sanitize_filename(str(name))
43
+ filename = f"{base}.md"
44
+ if filename in used_names:
45
+ short_id = str(getattr(data_item, "id", ""))[:8]
46
+ filename = f"{base}__{short_id}.md"
47
+ used_names.add(filename)
48
+ return filename
49
+
50
+
51
+ def _ascii_path_tree(path_str: str) -> str:
52
+ if not path_str:
53
+ return "(no path)"
54
+
55
+ # Normalize special schemes but keep segments readable
56
+ try:
57
+ normalized = get_data_file_path(path_str)
58
+ except Exception:
59
+ normalized = path_str
60
+
61
+ # Keep the path compact – show last 5 segments
62
+ parts = [p for p in normalized.replace("\\", "/").split("/") if p]
63
+ if len(parts) > 6:
64
+ display = ["…"] + parts[-5:]
65
+ else:
66
+ display = parts
67
+
68
+ # Render a single-branch tree
69
+ lines = []
70
+ for idx, seg in enumerate(display):
71
+ prefix = "└── " if idx == 0 else (" " * idx + "└── ")
72
+ lines.append(f"{prefix}{seg}")
73
+ return "\n".join(lines)
74
+
75
+
76
+ async def _get_summary_via_summaries(query_text: str, dataset_id: UUID, top_k: int) -> str:
77
+ try:
78
+ results = await search(
79
+ query_text=query_text,
80
+ query_type=SearchType.SUMMARIES,
81
+ dataset_ids=[dataset_id],
82
+ top_k=top_k,
83
+ )
84
+ if not results:
85
+ return ""
86
+ texts: List[str] = []
87
+ for r in results[:top_k]:
88
+ texts.append(str(r))
89
+ return "\n\n".join(texts)
90
+ except Exception as e:
91
+ logger.error(
92
+ "SUMMARIES search failed for '%s' in dataset %s: %s",
93
+ query_text,
94
+ str(dataset_id),
95
+ str(e),
96
+ )
97
+ return ""
98
+
99
+
100
+ async def _generate_questions(file_name: str, summary_text: str) -> List[str]:
101
+ prompt = (
102
+ "You are an expert analyst. Given a file and its summary, propose 10 diverse, high-signal "
103
+ "questions to further explore the file's content, implications, relationships, and gaps. "
104
+ "Avoid duplicates; vary depth and angle (overview, details, cross-references, temporal, quality).\n\n"
105
+ f"File: {file_name}\n\nSummary:\n{summary_text[:4000]}"
106
+ )
107
+
108
+ model = await LLMGateway.acreate_structured_output(
109
+ text_input=prompt,
110
+ system_prompt="Return strictly a JSON with key 'questions' and value as an array of 10 concise strings.",
111
+ response_model=QuestionsModel,
112
+ )
113
+
114
+ # model can be either pydantic model or dict-like, normalize
115
+ try:
116
+ questions = list(getattr(model, "questions", []))
117
+ except Exception:
118
+ questions = []
119
+
120
+ # Fallback if the tool returned a dict-like
121
+ if not questions and isinstance(model, dict):
122
+ questions = list(model.get("questions", []) or [])
123
+
124
+ # Enforce 10 max
125
+ return questions[:10]
126
+
127
+
128
+ async def _run_searches_for_question(
129
+ question: str, dataset_id: UUID, search_types: List[SearchType], top_k: int
130
+ ) -> Dict[str, Union[str, List[dict], List[str]]]:
131
+ async def run_one(st: SearchType):
132
+ try:
133
+ result = await search(
134
+ query_text=question,
135
+ query_type=st,
136
+ dataset_ids=[dataset_id],
137
+ top_k=top_k,
138
+ )
139
+ return st.value, result
140
+ except Exception as e:
141
+ logger.error("Search failed for type %s: %s", st.value, str(e))
142
+ return st.value, [f"Error: {str(e)}"]
143
+
144
+ pairs = await asyncio.gather(*[run_one(st) for st in search_types])
145
+ return {k: v for k, v in pairs}
146
+
147
+
148
+ def _format_results_md(results: Dict[str, Union[str, List[dict], List[str]]]) -> str:
149
+ lines: List[str] = []
150
+ for st, payload in results.items():
151
+ lines.append(f"#### {st}")
152
+ if isinstance(payload, list):
153
+ # Printed as bullet items; stringify dicts
154
+ for item in payload[:5]:
155
+ if isinstance(item, dict):
156
+ # compact representation
157
+ snippet = json.dumps(item, ensure_ascii=False)[:800]
158
+ lines.append(f"- {snippet}")
159
+ else:
160
+ text = str(item)
161
+ lines.append(f"- {text[:800]}")
162
+ else:
163
+ lines.append(str(payload))
164
+ lines.append("")
165
+ return "\n".join(lines)
166
+
167
+
168
+ async def save(
169
+ datasets: Optional[Union[List[str], List[UUID]]] = None,
170
+ export_root_directory: Optional[str] = None,
171
+ user: Optional[User] = None,
172
+ # Configurable knobs
173
+ max_questions: int = 10,
174
+ search_types: Optional[List[Union[str, SearchType]]] = None,
175
+ top_k: int = 5,
176
+ include_summary: bool = True,
177
+ include_ascii_tree: bool = True,
178
+ concurrency: int = 4,
179
+ timeout: Optional[float] = None,
180
+ ) -> Dict[str, str]:
181
+ """
182
+ Export per-dataset markdown summaries and search insights for each ingested file.
183
+
184
+ For every dataset the user can read:
185
+ - Create a folder under export_root_directory (or data_root_directory/exports)
186
+ - For each data item (file), create a .md containing:
187
+ - Summary of the file (from existing TextSummary nodes)
188
+ - A small ASCII path tree showing its folder position
189
+ - Up to N LLM-generated question ideas (configurable)
190
+ - Results of configured Cognee searches per question
191
+ Also creates an index.md per dataset with links to files and an optional dataset summary.
192
+
193
+ Returns a mapping of dataset_id -> export_directory path.
194
+ """
195
+ base_config = get_base_config()
196
+ export_root = export_root_directory or os.path.join(
197
+ base_config.data_root_directory, "memory_export"
198
+ )
199
+ os.makedirs(export_root, exist_ok=True)
200
+
201
+ if user is None:
202
+ user = await get_default_user()
203
+
204
+ datasets_list = await get_authorized_existing_datasets(datasets, "read", user)
205
+ results: Dict[str, str] = {}
206
+
207
+ for dataset in datasets_list:
208
+ ds_dir = os.path.join(export_root, _dataset_dir_name(dataset))
209
+ os.makedirs(ds_dir, exist_ok=True)
210
+ results[str(dataset.id)] = ds_dir
211
+
212
+ data_items = await get_dataset_data(dataset.id)
213
+
214
+ # Normalize search types
215
+ if not search_types:
216
+ effective_search_types = [
217
+ SearchType.GRAPH_COMPLETION,
218
+ SearchType.INSIGHTS,
219
+ SearchType.CHUNKS,
220
+ ]
221
+ else:
222
+ effective_search_types = []
223
+ for st in search_types:
224
+ if isinstance(st, SearchType):
225
+ effective_search_types.append(st)
226
+ else:
227
+ try:
228
+ effective_search_types.append(SearchType[str(st)])
229
+ except Exception:
230
+ logger.warning("Unknown search type '%s', skipping", str(st))
231
+
232
+ sem = asyncio.Semaphore(max(1, int(concurrency)))
233
+ used_names: set[str] = set()
234
+ index_entries: List[tuple[str, str]] = []
235
+
236
+ async def process_one(data_item):
237
+ async with sem:
238
+ file_label = getattr(data_item, "name", str(data_item.id))
239
+ original_path = getattr(data_item, "original_data_location", None)
240
+
241
+ ascii_tree = (
242
+ _ascii_path_tree(original_path or file_label) if include_ascii_tree else ""
243
+ )
244
+
245
+ summary_text = ""
246
+ if include_summary:
247
+ # Use SUMMARIES search scoped to dataset to derive file summary
248
+ file_query = getattr(data_item, "name", str(data_item.id)) or "file"
249
+ summary_text = await _get_summary_via_summaries(file_query, dataset.id, top_k)
250
+ if not summary_text:
251
+ summary_text = "Summary not available."
252
+
253
+ if max_questions == 0:
254
+ questions = []
255
+ else:
256
+ questions = await _generate_questions(file_label, summary_text)
257
+ if max_questions is not None and max_questions >= 0:
258
+ questions = questions[:max_questions]
259
+
260
+ async def searches_for_question(q: str):
261
+ return await _run_searches_for_question(
262
+ q, dataset.id, effective_search_types, top_k
263
+ )
264
+
265
+ # Run per-question searches concurrently
266
+ per_q_results = await asyncio.gather(*[searches_for_question(q) for q in questions])
267
+
268
+ # Build markdown content
269
+ md_lines = [f"# {file_label}", ""]
270
+ if include_ascii_tree:
271
+ md_lines.extend(["## Location", "", "```", ascii_tree, "```", ""])
272
+ if include_summary:
273
+ md_lines.extend(["## Summary", "", summary_text, ""])
274
+
275
+ md_lines.append("## Question ideas")
276
+ for idx, q in enumerate(questions, start=1):
277
+ md_lines.append(f"- {idx}. {q}")
278
+ md_lines.append("")
279
+
280
+ md_lines.append("## Searches")
281
+ md_lines.append("")
282
+ for q, per_type in zip(questions, per_q_results):
283
+ md_lines.append(f"### Q: {q}")
284
+ md_lines.append(_format_results_md(per_type))
285
+ md_lines.append("")
286
+
287
+ # Write to file (collision-safe)
288
+ md_filename = _file_markdown_name(data_item, used_names)
289
+ export_path = os.path.join(ds_dir, md_filename)
290
+ tmp_path = export_path + ".tmp"
291
+ with open(tmp_path, "w", encoding="utf-8") as f:
292
+ f.write("\n".join(md_lines))
293
+ os.replace(tmp_path, export_path)
294
+
295
+ index_entries.append((file_label, md_filename))
296
+
297
+ tasks = [asyncio.create_task(process_one(item)) for item in data_items]
298
+
299
+ if timeout and timeout > 0:
300
+ try:
301
+ await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout)
302
+ except asyncio.TimeoutError:
303
+ logger.error("Save timed out for dataset %s", str(dataset.id))
304
+ else:
305
+ await asyncio.gather(*tasks, return_exceptions=True)
306
+
307
+ # Build dataset index.md with TOC and optional dataset summary via SUMMARIES
308
+ try:
309
+ index_lines = [f"# Dataset: {_dataset_dir_name(dataset)}", "", "## Files", ""]
310
+ for display, fname in sorted(index_entries, key=lambda x: x[0].lower()):
311
+ index_lines.append(f"- [{display}]({fname})")
312
+
313
+ # Dataset summary section
314
+ try:
315
+ summaries = await search(
316
+ query_text="dataset overview",
317
+ query_type=SearchType.SUMMARIES,
318
+ dataset_ids=[dataset.id],
319
+ top_k=top_k,
320
+ )
321
+ except Exception as e:
322
+ logger.error("Dataset summary search failed: %s", str(e))
323
+ summaries = []
324
+
325
+ if summaries:
326
+ index_lines.extend(["", "## Dataset summary (top summaries)", ""])
327
+ for s in summaries[:top_k]:
328
+ index_lines.append(f"- {str(s)[:800]}")
329
+
330
+ with open(os.path.join(ds_dir, "index.md"), "w", encoding="utf-8") as f:
331
+ f.write("\n".join(index_lines))
332
+ except Exception as e:
333
+ logger.error("Failed to write dataset index for %s: %s", str(dataset.id), str(e))
334
+
335
+ return results
@@ -1,12 +1,12 @@
1
1
  from uuid import UUID
2
- from typing import Optional
2
+ from typing import Optional, Union, List, Any
3
3
  from datetime import datetime
4
4
  from pydantic import Field
5
5
  from fastapi import Depends, APIRouter
6
6
  from fastapi.responses import JSONResponse
7
7
  from fastapi.encoders import jsonable_encoder
8
8
 
9
- from cognee.modules.search.types import SearchType
9
+ from cognee.modules.search.types import SearchType, SearchResult, CombinedSearchResult
10
10
  from cognee.api.DTO import InDTO, OutDTO
11
11
  from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
12
12
  from cognee.modules.users.models import User
@@ -73,7 +73,7 @@ def get_search_router() -> APIRouter:
73
73
  except Exception as error:
74
74
  return JSONResponse(status_code=500, content={"error": str(error)})
75
75
 
76
- @router.post("", response_model=list)
76
+ @router.post("", response_model=Union[List[SearchResult], CombinedSearchResult, List])
77
77
  async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
78
78
  """
79
79
  Search for nodes in the graph database.
@@ -0,0 +1 @@
1
+ from .ui import start_ui, stop_ui, ui