basic-memory 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/api/routers/directory_router.py +23 -2
- basic_memory/api/routers/project_router.py +1 -0
- basic_memory/cli/auth.py +2 -2
- basic_memory/cli/commands/cloud/__init__.py +2 -1
- basic_memory/cli/commands/cloud/bisync_commands.py +4 -57
- basic_memory/cli/commands/cloud/cloud_utils.py +100 -0
- basic_memory/cli/commands/cloud/upload.py +128 -0
- basic_memory/cli/commands/cloud/upload_command.py +93 -0
- basic_memory/cli/commands/command_utils.py +11 -28
- basic_memory/cli/commands/mcp.py +72 -67
- basic_memory/cli/commands/project.py +140 -120
- basic_memory/cli/commands/status.py +6 -15
- basic_memory/config.py +55 -9
- basic_memory/deps.py +7 -5
- basic_memory/ignore_utils.py +7 -7
- basic_memory/mcp/async_client.py +102 -4
- basic_memory/mcp/prompts/continue_conversation.py +16 -15
- basic_memory/mcp/prompts/search.py +12 -11
- basic_memory/mcp/resources/ai_assistant_guide.md +185 -453
- basic_memory/mcp/resources/project_info.py +9 -7
- basic_memory/mcp/tools/build_context.py +40 -39
- basic_memory/mcp/tools/canvas.py +21 -20
- basic_memory/mcp/tools/chatgpt_tools.py +11 -2
- basic_memory/mcp/tools/delete_note.py +22 -21
- basic_memory/mcp/tools/edit_note.py +105 -104
- basic_memory/mcp/tools/list_directory.py +98 -95
- basic_memory/mcp/tools/move_note.py +127 -125
- basic_memory/mcp/tools/project_management.py +101 -98
- basic_memory/mcp/tools/read_content.py +64 -63
- basic_memory/mcp/tools/read_note.py +88 -88
- basic_memory/mcp/tools/recent_activity.py +139 -135
- basic_memory/mcp/tools/search.py +27 -26
- basic_memory/mcp/tools/sync_status.py +133 -128
- basic_memory/mcp/tools/utils.py +0 -15
- basic_memory/mcp/tools/view_note.py +14 -28
- basic_memory/mcp/tools/write_note.py +97 -87
- basic_memory/repository/entity_repository.py +60 -0
- basic_memory/repository/repository.py +16 -3
- basic_memory/repository/search_repository.py +42 -0
- basic_memory/schemas/cloud.py +7 -3
- basic_memory/schemas/project_info.py +1 -1
- basic_memory/services/directory_service.py +124 -3
- basic_memory/services/entity_service.py +31 -9
- basic_memory/services/project_service.py +97 -10
- basic_memory/services/search_service.py +16 -8
- basic_memory/sync/sync_service.py +28 -13
- {basic_memory-0.15.0.dist-info → basic_memory-0.15.2.dist-info}/METADATA +51 -4
- {basic_memory-0.15.0.dist-info → basic_memory-0.15.2.dist-info}/RECORD +52 -50
- basic_memory/mcp/tools/headers.py +0 -44
- {basic_memory-0.15.0.dist-info → basic_memory-0.15.2.dist-info}/WHEEL +0 -0
- {basic_memory-0.15.0.dist-info → basic_memory-0.15.2.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.15.0.dist-info → basic_memory-0.15.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -176,6 +176,66 @@ class EntityRepository(Repository[Entity]):
|
|
|
176
176
|
entity = await self._handle_permalink_conflict(entity, session)
|
|
177
177
|
return entity
|
|
178
178
|
|
|
179
|
+
async def get_distinct_directories(self) -> List[str]:
|
|
180
|
+
"""Extract unique directory paths from file_path column.
|
|
181
|
+
|
|
182
|
+
Optimized method for getting directory structure without loading full entities
|
|
183
|
+
or relationships. Returns a sorted list of unique directory paths.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List of unique directory paths (e.g., ["notes", "notes/meetings", "specs"])
|
|
187
|
+
"""
|
|
188
|
+
# Query only file_path column, no entity objects or relationships
|
|
189
|
+
query = select(Entity.file_path).distinct()
|
|
190
|
+
query = self._add_project_filter(query)
|
|
191
|
+
|
|
192
|
+
# Execute with use_query_options=False to skip eager loading
|
|
193
|
+
result = await self.execute_query(query, use_query_options=False)
|
|
194
|
+
file_paths = [row for row in result.scalars().all()]
|
|
195
|
+
|
|
196
|
+
# Parse file paths to extract unique directories
|
|
197
|
+
directories = set()
|
|
198
|
+
for file_path in file_paths:
|
|
199
|
+
parts = [p for p in file_path.split("/") if p]
|
|
200
|
+
# Add all parent directories (exclude filename which is the last part)
|
|
201
|
+
for i in range(len(parts) - 1):
|
|
202
|
+
dir_path = "/".join(parts[: i + 1])
|
|
203
|
+
directories.add(dir_path)
|
|
204
|
+
|
|
205
|
+
return sorted(directories)
|
|
206
|
+
|
|
207
|
+
async def find_by_directory_prefix(self, directory_prefix: str) -> Sequence[Entity]:
|
|
208
|
+
"""Find entities whose file_path starts with the given directory prefix.
|
|
209
|
+
|
|
210
|
+
Optimized method for listing directory contents without loading all entities.
|
|
211
|
+
Uses SQL LIKE pattern matching to filter entities by directory path.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
directory_prefix: Directory path prefix (e.g., "docs", "docs/guides")
|
|
215
|
+
Empty string returns all entities (root directory)
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Sequence of entities in the specified directory and subdirectories
|
|
219
|
+
"""
|
|
220
|
+
# Build SQL LIKE pattern
|
|
221
|
+
if directory_prefix == "" or directory_prefix == "/":
|
|
222
|
+
# Root directory - return all entities
|
|
223
|
+
return await self.find_all()
|
|
224
|
+
|
|
225
|
+
# Remove leading/trailing slashes for consistency
|
|
226
|
+
directory_prefix = directory_prefix.strip("/")
|
|
227
|
+
|
|
228
|
+
# Query entities with file_path starting with prefix
|
|
229
|
+
# Pattern matches "prefix/" to ensure we get files IN the directory,
|
|
230
|
+
# not just files whose names start with the prefix
|
|
231
|
+
pattern = f"{directory_prefix}/%"
|
|
232
|
+
|
|
233
|
+
query = self.select().where(Entity.file_path.like(pattern))
|
|
234
|
+
|
|
235
|
+
# Skip eager loading - we only need basic entity fields for directory trees
|
|
236
|
+
result = await self.execute_query(query, use_query_options=False)
|
|
237
|
+
return list(result.scalars().all())
|
|
238
|
+
|
|
179
239
|
async def _handle_permalink_conflict(self, entity: Entity, session: AsyncSession) -> Entity:
|
|
180
240
|
"""Handle permalink conflicts by generating a unique permalink."""
|
|
181
241
|
base_permalink = entity.permalink
|
|
@@ -152,12 +152,25 @@ class Repository[T: Base]:
|
|
|
152
152
|
# Add project filter if applicable
|
|
153
153
|
return self._add_project_filter(query)
|
|
154
154
|
|
|
155
|
-
async def find_all(
|
|
156
|
-
|
|
155
|
+
async def find_all(
|
|
156
|
+
self, skip: int = 0, limit: Optional[int] = None, use_load_options: bool = True
|
|
157
|
+
) -> Sequence[T]:
|
|
158
|
+
"""Fetch records from the database with pagination.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
skip: Number of records to skip
|
|
162
|
+
limit: Maximum number of records to return
|
|
163
|
+
use_load_options: Whether to apply eager loading options (default: True)
|
|
164
|
+
"""
|
|
157
165
|
logger.debug(f"Finding all {self.Model.__name__} (skip={skip}, limit={limit})")
|
|
158
166
|
|
|
159
167
|
async with db.scoped_session(self.session_maker) as session:
|
|
160
|
-
query = select(self.Model).offset(skip)
|
|
168
|
+
query = select(self.Model).offset(skip)
|
|
169
|
+
|
|
170
|
+
# Only apply load options if requested
|
|
171
|
+
if use_load_options:
|
|
172
|
+
query = query.options(*self.get_load_options())
|
|
173
|
+
|
|
161
174
|
# Add project filter if applicable
|
|
162
175
|
query = self._add_project_filter(query)
|
|
163
176
|
|
|
@@ -559,6 +559,48 @@ class SearchRepository:
|
|
|
559
559
|
logger.debug(f"indexed row {search_index_row}")
|
|
560
560
|
await session.commit()
|
|
561
561
|
|
|
562
|
+
async def bulk_index_items(self, search_index_rows: List[SearchIndexRow]):
|
|
563
|
+
"""Index multiple items in a single batch operation.
|
|
564
|
+
|
|
565
|
+
Note: This method assumes that any existing records for the entity_id
|
|
566
|
+
have already been deleted (typically via delete_by_entity_id).
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
search_index_rows: List of SearchIndexRow objects to index
|
|
570
|
+
"""
|
|
571
|
+
if not search_index_rows:
|
|
572
|
+
return
|
|
573
|
+
|
|
574
|
+
async with db.scoped_session(self.session_maker) as session:
|
|
575
|
+
# Prepare all insert data with project_id
|
|
576
|
+
insert_data_list = []
|
|
577
|
+
for row in search_index_rows:
|
|
578
|
+
insert_data = row.to_insert()
|
|
579
|
+
insert_data["project_id"] = self.project_id
|
|
580
|
+
insert_data_list.append(insert_data)
|
|
581
|
+
|
|
582
|
+
# Batch insert all records using executemany
|
|
583
|
+
await session.execute(
|
|
584
|
+
text("""
|
|
585
|
+
INSERT INTO search_index (
|
|
586
|
+
id, title, content_stems, content_snippet, permalink, file_path, type, metadata,
|
|
587
|
+
from_id, to_id, relation_type,
|
|
588
|
+
entity_id, category,
|
|
589
|
+
created_at, updated_at,
|
|
590
|
+
project_id
|
|
591
|
+
) VALUES (
|
|
592
|
+
:id, :title, :content_stems, :content_snippet, :permalink, :file_path, :type, :metadata,
|
|
593
|
+
:from_id, :to_id, :relation_type,
|
|
594
|
+
:entity_id, :category,
|
|
595
|
+
:created_at, :updated_at,
|
|
596
|
+
:project_id
|
|
597
|
+
)
|
|
598
|
+
"""),
|
|
599
|
+
insert_data_list,
|
|
600
|
+
)
|
|
601
|
+
logger.debug(f"Bulk indexed {len(search_index_rows)} rows")
|
|
602
|
+
await session.commit()
|
|
603
|
+
|
|
562
604
|
async def delete_by_entity_id(self, entity_id: int):
|
|
563
605
|
"""Delete an item from the search index by entity_id."""
|
|
564
606
|
async with db.scoped_session(self.session_maker) as session:
|
basic_memory/schemas/cloud.py
CHANGED
|
@@ -41,6 +41,10 @@ class CloudProjectCreateRequest(BaseModel):
|
|
|
41
41
|
class CloudProjectCreateResponse(BaseModel):
|
|
42
42
|
"""Response from creating a cloud project."""
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
message: str = Field(..., description="Status message about the project creation")
|
|
45
|
+
status: str = Field(..., description="Status of the creation (success or error)")
|
|
46
|
+
default: bool = Field(..., description="True if the project was set as the default")
|
|
47
|
+
old_project: dict | None = Field(None, description="Information about the previous project")
|
|
48
|
+
new_project: dict | None = Field(
|
|
49
|
+
None, description="Information about the newly created project"
|
|
50
|
+
)
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
import fnmatch
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
-
from typing import Dict, List, Optional
|
|
6
|
+
from typing import Dict, List, Optional, Sequence
|
|
7
7
|
|
|
8
|
+
from basic_memory.models import Entity
|
|
8
9
|
from basic_memory.repository import EntityRepository
|
|
9
10
|
from basic_memory.schemas.directory import DirectoryNode
|
|
10
11
|
|
|
@@ -89,6 +90,49 @@ class DirectoryService:
|
|
|
89
90
|
# Return the root node with its children
|
|
90
91
|
return root_node
|
|
91
92
|
|
|
93
|
+
async def get_directory_structure(self) -> DirectoryNode:
|
|
94
|
+
"""Build a hierarchical directory structure without file details.
|
|
95
|
+
|
|
96
|
+
Optimized method for folder navigation that only returns directory nodes,
|
|
97
|
+
no file metadata. Much faster than get_directory_tree() for large knowledge bases.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
DirectoryNode tree containing only folders (type="directory")
|
|
101
|
+
"""
|
|
102
|
+
# Get unique directories without loading entities
|
|
103
|
+
directories = await self.entity_repository.get_distinct_directories()
|
|
104
|
+
|
|
105
|
+
# Create a root directory node
|
|
106
|
+
root_node = DirectoryNode(name="Root", directory_path="/", type="directory")
|
|
107
|
+
|
|
108
|
+
# Map to store directory nodes by path for easy lookup
|
|
109
|
+
dir_map: Dict[str, DirectoryNode] = {"/": root_node}
|
|
110
|
+
|
|
111
|
+
# Build tree with just folders
|
|
112
|
+
for dir_path in directories:
|
|
113
|
+
parts = [p for p in dir_path.split("/") if p]
|
|
114
|
+
current_path = "/"
|
|
115
|
+
|
|
116
|
+
for i, part in enumerate(parts):
|
|
117
|
+
parent_path = current_path
|
|
118
|
+
# Build the directory path
|
|
119
|
+
current_path = (
|
|
120
|
+
f"{current_path}{part}" if current_path == "/" else f"{current_path}/{part}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Create directory node if it doesn't exist
|
|
124
|
+
if current_path not in dir_map:
|
|
125
|
+
dir_node = DirectoryNode(
|
|
126
|
+
name=part, directory_path=current_path, type="directory"
|
|
127
|
+
)
|
|
128
|
+
dir_map[current_path] = dir_node
|
|
129
|
+
|
|
130
|
+
# Add to parent's children
|
|
131
|
+
if parent_path in dir_map:
|
|
132
|
+
dir_map[parent_path].children.append(dir_node)
|
|
133
|
+
|
|
134
|
+
return root_node
|
|
135
|
+
|
|
92
136
|
async def list_directory(
|
|
93
137
|
self,
|
|
94
138
|
dir_name: str = "/",
|
|
@@ -118,8 +162,13 @@ class DirectoryService:
|
|
|
118
162
|
if dir_name != "/" and dir_name.endswith("/"):
|
|
119
163
|
dir_name = dir_name.rstrip("/")
|
|
120
164
|
|
|
121
|
-
#
|
|
122
|
-
|
|
165
|
+
# Optimize: Query only entities in the target directory
|
|
166
|
+
# instead of loading the entire tree
|
|
167
|
+
dir_prefix = dir_name.lstrip("/")
|
|
168
|
+
entity_rows = await self.entity_repository.find_by_directory_prefix(dir_prefix)
|
|
169
|
+
|
|
170
|
+
# Build a partial tree from only the relevant entities
|
|
171
|
+
root_tree = self._build_directory_tree_from_entities(entity_rows, dir_name)
|
|
123
172
|
|
|
124
173
|
# Find the target directory node
|
|
125
174
|
target_node = self._find_directory_node(root_tree, dir_name)
|
|
@@ -132,6 +181,78 @@ class DirectoryService:
|
|
|
132
181
|
|
|
133
182
|
return result
|
|
134
183
|
|
|
184
|
+
def _build_directory_tree_from_entities(
|
|
185
|
+
self, entity_rows: Sequence[Entity], root_path: str
|
|
186
|
+
) -> DirectoryNode:
|
|
187
|
+
"""Build a directory tree from a subset of entities.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
entity_rows: Sequence of entity objects to build tree from
|
|
191
|
+
root_path: Root directory path for the tree
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
DirectoryNode representing the tree root
|
|
195
|
+
"""
|
|
196
|
+
# Create a root directory node
|
|
197
|
+
root_node = DirectoryNode(name="Root", directory_path=root_path, type="directory")
|
|
198
|
+
|
|
199
|
+
# Map to store directory nodes by path for easy lookup
|
|
200
|
+
dir_map: Dict[str, DirectoryNode] = {root_path: root_node}
|
|
201
|
+
|
|
202
|
+
# First pass: create all directory nodes
|
|
203
|
+
for file in entity_rows:
|
|
204
|
+
# Process directory path components
|
|
205
|
+
parts = [p for p in file.file_path.split("/") if p]
|
|
206
|
+
|
|
207
|
+
# Create directory structure
|
|
208
|
+
current_path = "/"
|
|
209
|
+
for i, part in enumerate(parts[:-1]): # Skip the filename
|
|
210
|
+
parent_path = current_path
|
|
211
|
+
# Build the directory path
|
|
212
|
+
current_path = (
|
|
213
|
+
f"{current_path}{part}" if current_path == "/" else f"{current_path}/{part}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Create directory node if it doesn't exist
|
|
217
|
+
if current_path not in dir_map:
|
|
218
|
+
dir_node = DirectoryNode(
|
|
219
|
+
name=part, directory_path=current_path, type="directory"
|
|
220
|
+
)
|
|
221
|
+
dir_map[current_path] = dir_node
|
|
222
|
+
|
|
223
|
+
# Add to parent's children
|
|
224
|
+
if parent_path in dir_map:
|
|
225
|
+
dir_map[parent_path].children.append(dir_node)
|
|
226
|
+
|
|
227
|
+
# Second pass: add file nodes to their parent directories
|
|
228
|
+
for file in entity_rows:
|
|
229
|
+
file_name = os.path.basename(file.file_path)
|
|
230
|
+
parent_dir = os.path.dirname(file.file_path)
|
|
231
|
+
directory_path = "/" if parent_dir == "" else f"/{parent_dir}"
|
|
232
|
+
|
|
233
|
+
# Create file node
|
|
234
|
+
file_node = DirectoryNode(
|
|
235
|
+
name=file_name,
|
|
236
|
+
file_path=file.file_path,
|
|
237
|
+
directory_path=f"/{file.file_path}",
|
|
238
|
+
type="file",
|
|
239
|
+
title=file.title,
|
|
240
|
+
permalink=file.permalink,
|
|
241
|
+
entity_id=file.id,
|
|
242
|
+
entity_type=file.entity_type,
|
|
243
|
+
content_type=file.content_type,
|
|
244
|
+
updated_at=file.updated_at,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Add to parent directory's children
|
|
248
|
+
if directory_path in dir_map:
|
|
249
|
+
dir_map[directory_path].children.append(file_node)
|
|
250
|
+
elif root_path in dir_map:
|
|
251
|
+
# Fallback to root if parent not found
|
|
252
|
+
dir_map[root_path].children.append(file_node)
|
|
253
|
+
|
|
254
|
+
return root_node
|
|
255
|
+
|
|
135
256
|
def _find_directory_node(
|
|
136
257
|
self, root: DirectoryNode, target_path: str
|
|
137
258
|
) -> Optional[DirectoryNode]:
|
|
@@ -52,7 +52,9 @@ class EntityService(BaseService[EntityModel]):
|
|
|
52
52
|
self.link_resolver = link_resolver
|
|
53
53
|
self.app_config = app_config
|
|
54
54
|
|
|
55
|
-
async def detect_file_path_conflicts(
|
|
55
|
+
async def detect_file_path_conflicts(
|
|
56
|
+
self, file_path: str, skip_check: bool = False
|
|
57
|
+
) -> List[Entity]:
|
|
56
58
|
"""Detect potential file path conflicts for a given file path.
|
|
57
59
|
|
|
58
60
|
This checks for entities with similar file paths that might cause conflicts:
|
|
@@ -63,10 +65,14 @@ class EntityService(BaseService[EntityModel]):
|
|
|
63
65
|
|
|
64
66
|
Args:
|
|
65
67
|
file_path: The file path to check for conflicts
|
|
68
|
+
skip_check: If True, skip the check and return empty list (optimization for bulk operations)
|
|
66
69
|
|
|
67
70
|
Returns:
|
|
68
71
|
List of entities that might conflict with the given file path
|
|
69
72
|
"""
|
|
73
|
+
if skip_check:
|
|
74
|
+
return []
|
|
75
|
+
|
|
70
76
|
from basic_memory.utils import detect_potential_file_conflicts
|
|
71
77
|
|
|
72
78
|
conflicts = []
|
|
@@ -86,7 +92,10 @@ class EntityService(BaseService[EntityModel]):
|
|
|
86
92
|
return conflicts
|
|
87
93
|
|
|
88
94
|
async def resolve_permalink(
|
|
89
|
-
self,
|
|
95
|
+
self,
|
|
96
|
+
file_path: Permalink | Path,
|
|
97
|
+
markdown: Optional[EntityMarkdown] = None,
|
|
98
|
+
skip_conflict_check: bool = False,
|
|
90
99
|
) -> str:
|
|
91
100
|
"""Get or generate unique permalink for an entity.
|
|
92
101
|
|
|
@@ -101,7 +110,9 @@ class EntityService(BaseService[EntityModel]):
|
|
|
101
110
|
file_path_str = Path(file_path).as_posix()
|
|
102
111
|
|
|
103
112
|
# Check for potential file path conflicts before resolving permalink
|
|
104
|
-
conflicts = await self.detect_file_path_conflicts(
|
|
113
|
+
conflicts = await self.detect_file_path_conflicts(
|
|
114
|
+
file_path_str, skip_check=skip_conflict_check
|
|
115
|
+
)
|
|
105
116
|
if conflicts:
|
|
106
117
|
logger.warning(
|
|
107
118
|
f"Detected potential file path conflicts for '{file_path_str}': "
|
|
@@ -445,6 +456,7 @@ class EntityService(BaseService[EntityModel]):
|
|
|
445
456
|
resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True)
|
|
446
457
|
|
|
447
458
|
# Process results and create relation records
|
|
459
|
+
relations_to_add = []
|
|
448
460
|
for rel, resolved in zip(markdown.relations, resolved_entities):
|
|
449
461
|
# Handle exceptions from gather and None results
|
|
450
462
|
target_entity: Optional[Entity] = None
|
|
@@ -465,14 +477,24 @@ class EntityService(BaseService[EntityModel]):
|
|
|
465
477
|
relation_type=rel.type,
|
|
466
478
|
context=rel.context,
|
|
467
479
|
)
|
|
480
|
+
relations_to_add.append(relation)
|
|
481
|
+
|
|
482
|
+
# Batch insert all relations
|
|
483
|
+
if relations_to_add:
|
|
468
484
|
try:
|
|
469
|
-
await self.relation_repository.
|
|
485
|
+
await self.relation_repository.add_all(relations_to_add)
|
|
470
486
|
except IntegrityError:
|
|
471
|
-
#
|
|
472
|
-
logger.debug(
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
487
|
+
# Some relations might be duplicates - fall back to individual inserts
|
|
488
|
+
logger.debug("Batch relation insert failed, trying individual inserts")
|
|
489
|
+
for relation in relations_to_add:
|
|
490
|
+
try:
|
|
491
|
+
await self.relation_repository.add(relation)
|
|
492
|
+
except IntegrityError:
|
|
493
|
+
# Unique constraint violation - relation already exists
|
|
494
|
+
logger.debug(
|
|
495
|
+
f"Skipping duplicate relation {relation.relation_type} from {db_entity.permalink}"
|
|
496
|
+
)
|
|
497
|
+
continue
|
|
476
498
|
|
|
477
499
|
return await self.repository.get_by_file_path(path)
|
|
478
500
|
|
|
@@ -80,7 +80,13 @@ class ProjectService:
|
|
|
80
80
|
return os.environ.get("BASIC_MEMORY_PROJECT", self.config_manager.default_project)
|
|
81
81
|
|
|
82
82
|
async def list_projects(self) -> Sequence[Project]:
|
|
83
|
-
|
|
83
|
+
"""List all projects without loading entity relationships.
|
|
84
|
+
|
|
85
|
+
Returns only basic project fields (name, path, etc.) without
|
|
86
|
+
eager loading the entities relationship which could load thousands
|
|
87
|
+
of entities for large knowledge bases.
|
|
88
|
+
"""
|
|
89
|
+
return await self.repository.find_all(use_load_options=False)
|
|
84
90
|
|
|
85
91
|
async def get_project(self, name: str) -> Optional[Project]:
|
|
86
92
|
"""Get the file path for a project by name or permalink."""
|
|
@@ -88,6 +94,40 @@ class ProjectService:
|
|
|
88
94
|
name
|
|
89
95
|
)
|
|
90
96
|
|
|
97
|
+
def _check_nested_paths(self, path1: str, path2: str) -> bool:
|
|
98
|
+
"""Check if two paths are nested (one is a prefix of the other).
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
path1: First path to compare
|
|
102
|
+
path2: Second path to compare
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
True if one path is nested within the other, False otherwise
|
|
106
|
+
|
|
107
|
+
Examples:
|
|
108
|
+
_check_nested_paths("/foo", "/foo/bar") # True (child under parent)
|
|
109
|
+
_check_nested_paths("/foo/bar", "/foo") # True (parent over child)
|
|
110
|
+
_check_nested_paths("/foo", "/bar") # False (siblings)
|
|
111
|
+
"""
|
|
112
|
+
# Normalize paths to ensure proper comparison
|
|
113
|
+
p1 = Path(path1).resolve()
|
|
114
|
+
p2 = Path(path2).resolve()
|
|
115
|
+
|
|
116
|
+
# Check if either path is a parent of the other
|
|
117
|
+
try:
|
|
118
|
+
# Check if p2 is under p1
|
|
119
|
+
p2.relative_to(p1)
|
|
120
|
+
return True
|
|
121
|
+
except ValueError:
|
|
122
|
+
# Not nested in this direction, check the other
|
|
123
|
+
try:
|
|
124
|
+
# Check if p1 is under p2
|
|
125
|
+
p1.relative_to(p2)
|
|
126
|
+
return True
|
|
127
|
+
except ValueError:
|
|
128
|
+
# Not nested in either direction
|
|
129
|
+
return False
|
|
130
|
+
|
|
91
131
|
async def add_project(self, name: str, path: str, set_default: bool = False) -> None:
|
|
92
132
|
"""Add a new project to the configuration and database.
|
|
93
133
|
|
|
@@ -97,19 +137,66 @@ class ProjectService:
|
|
|
97
137
|
set_default: Whether to set this project as the default
|
|
98
138
|
|
|
99
139
|
Raises:
|
|
100
|
-
ValueError: If the project already exists
|
|
140
|
+
ValueError: If the project already exists or path collides with existing project
|
|
101
141
|
"""
|
|
102
|
-
#
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
#
|
|
109
|
-
|
|
142
|
+
# If project_root is set, constrain all projects to that directory
|
|
143
|
+
project_root = self.config_manager.config.project_root
|
|
144
|
+
if project_root:
|
|
145
|
+
base_path = Path(project_root)
|
|
146
|
+
|
|
147
|
+
# In cloud mode (when project_root is set), ignore user's path completely
|
|
148
|
+
# and use sanitized project name as the directory name
|
|
149
|
+
# This ensures flat structure: /app/data/test-bisync instead of /app/data/documents/test bisync
|
|
150
|
+
sanitized_name = generate_permalink(name)
|
|
151
|
+
|
|
152
|
+
# Construct path using sanitized project name only
|
|
153
|
+
resolved_path = (base_path / sanitized_name).resolve().as_posix()
|
|
154
|
+
|
|
155
|
+
# Verify the resolved path is actually under project_root
|
|
156
|
+
if not resolved_path.startswith(base_path.resolve().as_posix()):
|
|
157
|
+
raise ValueError(
|
|
158
|
+
f"BASIC_MEMORY_PROJECT_ROOT is set to {project_root}. "
|
|
159
|
+
f"All projects must be created under this directory. Invalid path: {path}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Check for case-insensitive path collisions with existing projects
|
|
163
|
+
existing_projects = await self.list_projects()
|
|
164
|
+
for existing in existing_projects:
|
|
165
|
+
if (
|
|
166
|
+
existing.path.lower() == resolved_path.lower()
|
|
167
|
+
and existing.path != resolved_path
|
|
168
|
+
):
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f"Path collision detected: '{resolved_path}' conflicts with existing project "
|
|
171
|
+
f"'{existing.name}' at '{existing.path}'. "
|
|
172
|
+
f"In cloud mode, paths are normalized to lowercase to prevent case-sensitivity issues."
|
|
173
|
+
)
|
|
110
174
|
else:
|
|
111
175
|
resolved_path = Path(os.path.abspath(os.path.expanduser(path))).as_posix()
|
|
112
176
|
|
|
177
|
+
# Check for nested paths with existing projects
|
|
178
|
+
existing_projects = await self.list_projects()
|
|
179
|
+
for existing in existing_projects:
|
|
180
|
+
if self._check_nested_paths(resolved_path, existing.path):
|
|
181
|
+
# Determine which path is nested within which for appropriate error message
|
|
182
|
+
p_new = Path(resolved_path).resolve()
|
|
183
|
+
p_existing = Path(existing.path).resolve()
|
|
184
|
+
|
|
185
|
+
# Check if new path is nested under existing project
|
|
186
|
+
if p_new.is_relative_to(p_existing):
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Cannot create project at '{resolved_path}': "
|
|
189
|
+
f"path is nested within existing project '{existing.name}' at '{existing.path}'. "
|
|
190
|
+
f"Projects cannot share directory trees."
|
|
191
|
+
)
|
|
192
|
+
else:
|
|
193
|
+
# Existing project is nested under new path
|
|
194
|
+
raise ValueError(
|
|
195
|
+
f"Cannot create project at '{resolved_path}': "
|
|
196
|
+
f"existing project '{existing.name}' at '{existing.path}' is nested within this path. "
|
|
197
|
+
f"Projects cannot share directory trees."
|
|
198
|
+
)
|
|
199
|
+
|
|
113
200
|
# First add to config file (this will validate the project doesn't exist)
|
|
114
201
|
project_config = self.config_manager.add_project(name, resolved_path)
|
|
115
202
|
|
|
@@ -113,8 +113,10 @@ class SearchService:
|
|
|
113
113
|
# Add word boundaries
|
|
114
114
|
variants.update(w.strip() for w in text.lower().split() if w.strip())
|
|
115
115
|
|
|
116
|
-
#
|
|
117
|
-
|
|
116
|
+
# Trigrams disabled: They create massive search index bloat, increasing DB size significantly
|
|
117
|
+
# and slowing down indexing performance. FTS5 search works well without them.
|
|
118
|
+
# See: https://github.com/basicmachines-co/basic-memory/issues/351
|
|
119
|
+
# variants.update(text[i : i + 3].lower() for i in range(len(text) - 2))
|
|
118
120
|
|
|
119
121
|
return variants
|
|
120
122
|
|
|
@@ -219,6 +221,9 @@ class SearchService:
|
|
|
219
221
|
The project_id is automatically added by the repository when indexing.
|
|
220
222
|
"""
|
|
221
223
|
|
|
224
|
+
# Collect all search index rows to batch insert at the end
|
|
225
|
+
rows_to_index = []
|
|
226
|
+
|
|
222
227
|
content_stems = []
|
|
223
228
|
content_snippet = ""
|
|
224
229
|
title_variants = self._generate_variants(entity.title)
|
|
@@ -241,8 +246,8 @@ class SearchService:
|
|
|
241
246
|
|
|
242
247
|
entity_content_stems = "\n".join(p for p in content_stems if p and p.strip())
|
|
243
248
|
|
|
244
|
-
#
|
|
245
|
-
|
|
249
|
+
# Add entity row
|
|
250
|
+
rows_to_index.append(
|
|
246
251
|
SearchIndexRow(
|
|
247
252
|
id=entity.id,
|
|
248
253
|
type=SearchItemType.ENTITY.value,
|
|
@@ -261,13 +266,13 @@ class SearchService:
|
|
|
261
266
|
)
|
|
262
267
|
)
|
|
263
268
|
|
|
264
|
-
#
|
|
269
|
+
# Add observation rows
|
|
265
270
|
for obs in entity.observations:
|
|
266
271
|
# Index with parent entity's file path since that's where it's defined
|
|
267
272
|
obs_content_stems = "\n".join(
|
|
268
273
|
p for p in self._generate_variants(obs.content) if p and p.strip()
|
|
269
274
|
)
|
|
270
|
-
|
|
275
|
+
rows_to_index.append(
|
|
271
276
|
SearchIndexRow(
|
|
272
277
|
id=obs.id,
|
|
273
278
|
type=SearchItemType.OBSERVATION.value,
|
|
@@ -287,7 +292,7 @@ class SearchService:
|
|
|
287
292
|
)
|
|
288
293
|
)
|
|
289
294
|
|
|
290
|
-
#
|
|
295
|
+
# Add relation rows (only outgoing relations defined in this file)
|
|
291
296
|
for rel in entity.outgoing_relations:
|
|
292
297
|
# Create descriptive title showing the relationship
|
|
293
298
|
relation_title = (
|
|
@@ -299,7 +304,7 @@ class SearchService:
|
|
|
299
304
|
rel_content_stems = "\n".join(
|
|
300
305
|
p for p in self._generate_variants(relation_title) if p and p.strip()
|
|
301
306
|
)
|
|
302
|
-
|
|
307
|
+
rows_to_index.append(
|
|
303
308
|
SearchIndexRow(
|
|
304
309
|
id=rel.id,
|
|
305
310
|
title=relation_title,
|
|
@@ -317,6 +322,9 @@ class SearchService:
|
|
|
317
322
|
)
|
|
318
323
|
)
|
|
319
324
|
|
|
325
|
+
# Batch insert all rows at once
|
|
326
|
+
await self.repository.bulk_index_items(rows_to_index)
|
|
327
|
+
|
|
320
328
|
async def delete_by_permalink(self, permalink: str):
|
|
321
329
|
"""Delete an item from the search index."""
|
|
322
330
|
await self.repository.delete_by_permalink(permalink)
|