basic-memory 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/alembic.ini +119 -0
- basic_memory/alembic/env.py +23 -1
- basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
- basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
- basic_memory/api/app.py +0 -4
- basic_memory/api/routers/knowledge_router.py +1 -1
- basic_memory/api/routers/memory_router.py +16 -16
- basic_memory/api/routers/resource_router.py +105 -4
- basic_memory/cli/app.py +0 -2
- basic_memory/cli/commands/status.py +9 -21
- basic_memory/cli/commands/sync.py +12 -16
- basic_memory/cli/commands/tools.py +36 -13
- basic_memory/cli/main.py +0 -1
- basic_memory/config.py +15 -1
- basic_memory/file_utils.py +6 -4
- basic_memory/markdown/entity_parser.py +3 -3
- basic_memory/mcp/async_client.py +1 -1
- basic_memory/mcp/main.py +25 -0
- basic_memory/mcp/prompts/__init__.py +15 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +28 -0
- basic_memory/mcp/prompts/continue_conversation.py +172 -0
- basic_memory/mcp/prompts/json_canvas_spec.py +25 -0
- basic_memory/mcp/prompts/recent_activity.py +46 -0
- basic_memory/mcp/prompts/search.py +127 -0
- basic_memory/mcp/prompts/utils.py +98 -0
- basic_memory/mcp/server.py +3 -7
- basic_memory/mcp/tools/__init__.py +6 -4
- basic_memory/mcp/tools/canvas.py +99 -0
- basic_memory/mcp/tools/memory.py +12 -5
- basic_memory/mcp/tools/notes.py +1 -2
- basic_memory/mcp/tools/resource.py +192 -0
- basic_memory/mcp/tools/utils.py +2 -1
- basic_memory/models/knowledge.py +27 -11
- basic_memory/repository/repository.py +1 -1
- basic_memory/repository/search_repository.py +14 -4
- basic_memory/schemas/__init__.py +0 -11
- basic_memory/schemas/base.py +4 -1
- basic_memory/schemas/memory.py +11 -2
- basic_memory/schemas/search.py +2 -1
- basic_memory/services/entity_service.py +19 -12
- basic_memory/services/file_service.py +69 -2
- basic_memory/services/link_resolver.py +12 -9
- basic_memory/services/search_service.py +56 -12
- basic_memory/sync/__init__.py +3 -2
- basic_memory/sync/sync_service.py +294 -123
- basic_memory/sync/watch_service.py +125 -129
- basic_memory/utils.py +24 -9
- {basic_memory-0.7.0.dist-info → basic_memory-0.8.0.dist-info}/METADATA +2 -1
- basic_memory-0.8.0.dist-info/RECORD +91 -0
- basic_memory/alembic/README +0 -1
- basic_memory/schemas/discovery.py +0 -28
- basic_memory/sync/file_change_scanner.py +0 -158
- basic_memory/sync/utils.py +0 -31
- basic_memory-0.7.0.dist-info/RECORD +0 -82
- {basic_memory-0.7.0.dist-info → basic_memory-0.8.0.dist-info}/WHEEL +0 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.8.0.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.7.0.dist-info → basic_memory-0.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,47 +1,262 @@
|
|
|
1
1
|
"""Service for syncing files between filesystem and database."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import field
|
|
6
|
+
from datetime import datetime
|
|
3
7
|
from pathlib import Path
|
|
4
|
-
from typing import Dict
|
|
8
|
+
from typing import Set, Dict
|
|
9
|
+
from typing import Tuple
|
|
5
10
|
|
|
6
11
|
import logfire
|
|
7
12
|
from loguru import logger
|
|
8
|
-
from sqlalchemy.exc import IntegrityError
|
|
9
13
|
|
|
10
|
-
from basic_memory import
|
|
11
|
-
from basic_memory.
|
|
14
|
+
from basic_memory.markdown import EntityParser
|
|
15
|
+
from basic_memory.models import Entity
|
|
12
16
|
from basic_memory.repository import EntityRepository, RelationRepository
|
|
13
|
-
from basic_memory.services import EntityService
|
|
17
|
+
from basic_memory.services import EntityService, FileService
|
|
14
18
|
from basic_memory.services.search_service import SearchService
|
|
15
|
-
from basic_memory.sync import FileChangeScanner
|
|
16
|
-
from basic_memory.sync.utils import SyncReport
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
@dataclass
|
|
22
|
+
class SyncReport:
|
|
23
|
+
"""Report of file changes found compared to database state.
|
|
21
24
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
+
Attributes:
|
|
26
|
+
total: Total number of files in directory being synced
|
|
27
|
+
new: Files that exist on disk but not in database
|
|
28
|
+
modified: Files that exist in both but have different checksums
|
|
29
|
+
deleted: Files that exist in database but not on disk
|
|
30
|
+
moves: Files that have been moved from one location to another
|
|
31
|
+
checksums: Current checksums for files on disk
|
|
25
32
|
"""
|
|
26
33
|
|
|
34
|
+
# We keep paths as strings in sets/dicts for easier serialization
|
|
35
|
+
new: Set[str] = field(default_factory=set)
|
|
36
|
+
modified: Set[str] = field(default_factory=set)
|
|
37
|
+
deleted: Set[str] = field(default_factory=set)
|
|
38
|
+
moves: Dict[str, str] = field(default_factory=dict) # old_path -> new_path
|
|
39
|
+
checksums: Dict[str, str] = field(default_factory=dict) # path -> checksum
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def total(self) -> int:
|
|
43
|
+
"""Total number of changes."""
|
|
44
|
+
return len(self.new) + len(self.modified) + len(self.deleted) + len(self.moves)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ScanResult:
|
|
49
|
+
"""Result of scanning a directory."""
|
|
50
|
+
|
|
51
|
+
# file_path -> checksum
|
|
52
|
+
files: Dict[str, str] = field(default_factory=dict)
|
|
53
|
+
|
|
54
|
+
# checksum -> file_path
|
|
55
|
+
checksums: Dict[str, str] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
# file_path -> error message
|
|
58
|
+
errors: Dict[str, str] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SyncService:
|
|
62
|
+
"""Syncs documents and knowledge files with database."""
|
|
63
|
+
|
|
27
64
|
def __init__(
|
|
28
65
|
self,
|
|
29
|
-
scanner: FileChangeScanner,
|
|
30
66
|
entity_service: EntityService,
|
|
31
67
|
entity_parser: EntityParser,
|
|
32
68
|
entity_repository: EntityRepository,
|
|
33
69
|
relation_repository: RelationRepository,
|
|
34
70
|
search_service: SearchService,
|
|
71
|
+
file_service: FileService,
|
|
35
72
|
):
|
|
36
|
-
self.scanner = scanner
|
|
37
73
|
self.entity_service = entity_service
|
|
38
74
|
self.entity_parser = entity_parser
|
|
39
75
|
self.entity_repository = entity_repository
|
|
40
76
|
self.relation_repository = relation_repository
|
|
41
77
|
self.search_service = search_service
|
|
78
|
+
self.file_service = file_service
|
|
79
|
+
|
|
80
|
+
async def sync(self, directory: Path) -> SyncReport:
|
|
81
|
+
"""Sync all files with database."""
|
|
82
|
+
|
|
83
|
+
with logfire.span(f"sync {directory}", directory=directory): # pyright: ignore [reportGeneralTypeIssues]
|
|
84
|
+
# initial paths from db to sync
|
|
85
|
+
# path -> checksum
|
|
86
|
+
report = await self.scan(directory)
|
|
87
|
+
|
|
88
|
+
# order of sync matters to resolve relations effectively
|
|
89
|
+
|
|
90
|
+
# sync moves first
|
|
91
|
+
for old_path, new_path in report.moves.items():
|
|
92
|
+
# in the case where a file has been deleted and replaced by another file
|
|
93
|
+
# it will show up in the move and modified lists, so handle it in modified
|
|
94
|
+
if new_path in report.modified:
|
|
95
|
+
report.modified.remove(new_path)
|
|
96
|
+
else:
|
|
97
|
+
await self.handle_move(old_path, new_path)
|
|
98
|
+
|
|
99
|
+
# deleted next
|
|
100
|
+
for path in report.deleted:
|
|
101
|
+
await self.handle_delete(path)
|
|
102
|
+
|
|
103
|
+
# then new and modified
|
|
104
|
+
for path in report.new:
|
|
105
|
+
await self.sync_file(path, new=True)
|
|
106
|
+
|
|
107
|
+
for path in report.modified:
|
|
108
|
+
await self.sync_file(path, new=False)
|
|
109
|
+
|
|
110
|
+
await self.resolve_relations()
|
|
111
|
+
return report
|
|
112
|
+
|
|
113
|
+
async def scan(self, directory):
|
|
114
|
+
"""Scan directory for changes compared to database state."""
|
|
115
|
+
|
|
116
|
+
db_paths = await self.get_db_file_state()
|
|
117
|
+
|
|
118
|
+
# Track potentially moved files by checksum
|
|
119
|
+
scan_result = await self.scan_directory(directory)
|
|
120
|
+
report = SyncReport()
|
|
121
|
+
|
|
122
|
+
# First find potential new files and record checksums
|
|
123
|
+
# if a path is not present in the db, it could be new or could be the destination of a move
|
|
124
|
+
for file_path, checksum in scan_result.files.items():
|
|
125
|
+
if file_path not in db_paths:
|
|
126
|
+
report.new.add(file_path)
|
|
127
|
+
report.checksums[file_path] = checksum
|
|
128
|
+
|
|
129
|
+
# Now detect moves and deletions
|
|
130
|
+
for db_path, db_checksum in db_paths.items():
|
|
131
|
+
local_checksum_for_db_path = scan_result.files.get(db_path)
|
|
132
|
+
|
|
133
|
+
# file not modified
|
|
134
|
+
if db_checksum == local_checksum_for_db_path:
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
# if checksums don't match for the same path, its modified
|
|
138
|
+
if local_checksum_for_db_path and db_checksum != local_checksum_for_db_path:
|
|
139
|
+
report.modified.add(db_path)
|
|
140
|
+
report.checksums[db_path] = local_checksum_for_db_path
|
|
141
|
+
|
|
142
|
+
# check if it's moved or deleted
|
|
143
|
+
if not local_checksum_for_db_path:
|
|
144
|
+
# if we find the checksum in another file, it's a move
|
|
145
|
+
if db_checksum in scan_result.checksums:
|
|
146
|
+
new_path = scan_result.checksums[db_checksum]
|
|
147
|
+
report.moves[db_path] = new_path
|
|
148
|
+
|
|
149
|
+
# Remove from new files if present
|
|
150
|
+
if new_path in report.new:
|
|
151
|
+
report.new.remove(new_path)
|
|
152
|
+
|
|
153
|
+
# deleted
|
|
154
|
+
else:
|
|
155
|
+
report.deleted.add(db_path)
|
|
156
|
+
return report
|
|
157
|
+
|
|
158
|
+
async def get_db_file_state(self) -> Dict[str, str]:
|
|
159
|
+
"""Get file_path and checksums from database.
|
|
160
|
+
Args:
|
|
161
|
+
db_records: database records
|
|
162
|
+
Returns:
|
|
163
|
+
Dict mapping file paths to FileState
|
|
164
|
+
:param db_records: the data from the db
|
|
165
|
+
"""
|
|
166
|
+
db_records = await self.entity_repository.find_all()
|
|
167
|
+
return {r.file_path: r.checksum or "" for r in db_records}
|
|
168
|
+
|
|
169
|
+
async def sync_file(self, path: str, new: bool = True) -> Tuple[Entity, str]:
|
|
170
|
+
"""Sync a single file."""
|
|
42
171
|
|
|
43
|
-
|
|
172
|
+
try:
|
|
173
|
+
if self.file_service.is_markdown(path):
|
|
174
|
+
entity, checksum = await self.sync_markdown_file(path, new)
|
|
175
|
+
else:
|
|
176
|
+
entity, checksum = await self.sync_regular_file(path, new)
|
|
177
|
+
await self.search_service.index_entity(entity)
|
|
178
|
+
return entity, checksum
|
|
179
|
+
|
|
180
|
+
except Exception as e: # pragma: no cover
|
|
181
|
+
logger.error(f"Failed to sync {path}: {e}")
|
|
182
|
+
raise
|
|
183
|
+
|
|
184
|
+
async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Entity, str]:
|
|
185
|
+
"""Sync a markdown file with full processing."""
|
|
186
|
+
|
|
187
|
+
# Parse markdown first to get any existing permalink
|
|
188
|
+
entity_markdown = await self.entity_parser.parse_file(path)
|
|
189
|
+
|
|
190
|
+
# Resolve permalink - this handles all the cases including conflicts
|
|
191
|
+
permalink = await self.entity_service.resolve_permalink(path, markdown=entity_markdown)
|
|
192
|
+
|
|
193
|
+
# If permalink changed, update the file
|
|
194
|
+
if permalink != entity_markdown.frontmatter.permalink:
|
|
195
|
+
logger.info(f"Updating permalink in {path}: {permalink}")
|
|
196
|
+
entity_markdown.frontmatter.metadata["permalink"] = permalink
|
|
197
|
+
checksum = await self.file_service.update_frontmatter(path, {"permalink": permalink})
|
|
198
|
+
else:
|
|
199
|
+
checksum = await self.file_service.compute_checksum(path)
|
|
200
|
+
|
|
201
|
+
# if the file is new, create an entity
|
|
202
|
+
if new:
|
|
203
|
+
# Create entity with final permalink
|
|
204
|
+
logger.debug(f"Creating new entity from markdown: {path}")
|
|
205
|
+
await self.entity_service.create_entity_from_markdown(Path(path), entity_markdown)
|
|
206
|
+
|
|
207
|
+
# otherwise we need to update the entity and observations
|
|
208
|
+
else:
|
|
209
|
+
logger.debug(f"Updating entity from markdown: {path}")
|
|
210
|
+
await self.entity_service.update_entity_and_observations(Path(path), entity_markdown)
|
|
211
|
+
|
|
212
|
+
# Update relations and search index
|
|
213
|
+
entity = await self.entity_service.update_entity_relations(path, entity_markdown)
|
|
214
|
+
|
|
215
|
+
# set checksum
|
|
216
|
+
await self.entity_repository.update(entity.id, {"checksum": checksum})
|
|
217
|
+
return entity, checksum
|
|
218
|
+
|
|
219
|
+
async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Entity, str]:
|
|
220
|
+
"""Sync a non-markdown file with basic tracking."""
|
|
221
|
+
|
|
222
|
+
checksum = await self.file_service.compute_checksum(path)
|
|
223
|
+
if new:
|
|
224
|
+
# Generate permalink from path
|
|
225
|
+
await self.entity_service.resolve_permalink(path)
|
|
226
|
+
|
|
227
|
+
# get file timestamps
|
|
228
|
+
file_stats = self.file_service.file_stats(path)
|
|
229
|
+
created = datetime.fromtimestamp(file_stats.st_ctime)
|
|
230
|
+
modified = datetime.fromtimestamp(file_stats.st_mtime)
|
|
231
|
+
|
|
232
|
+
# get mime type
|
|
233
|
+
content_type = self.file_service.content_type(path)
|
|
234
|
+
|
|
235
|
+
file_path = Path(path)
|
|
236
|
+
entity = await self.entity_repository.add(
|
|
237
|
+
Entity(
|
|
238
|
+
entity_type="file",
|
|
239
|
+
file_path=path,
|
|
240
|
+
checksum=checksum,
|
|
241
|
+
title=file_path.name,
|
|
242
|
+
created_at=created,
|
|
243
|
+
updated_at=modified,
|
|
244
|
+
content_type=content_type,
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
return entity, checksum
|
|
248
|
+
else:
|
|
249
|
+
entity = await self.entity_repository.get_by_file_path(path)
|
|
250
|
+
assert entity is not None, "entity should not be None for existing file"
|
|
251
|
+
updated = await self.entity_repository.update(
|
|
252
|
+
entity.id, {"file_path": path, "checksum": checksum}
|
|
253
|
+
)
|
|
254
|
+
assert updated is not None, "entity should be updated"
|
|
255
|
+
return updated, checksum
|
|
256
|
+
|
|
257
|
+
async def handle_delete(self, file_path: str):
|
|
44
258
|
"""Handle complete entity deletion including search index cleanup."""
|
|
259
|
+
|
|
45
260
|
# First get entity to get permalink before deletion
|
|
46
261
|
entity = await self.entity_repository.get_by_file_path(file_path)
|
|
47
262
|
if entity:
|
|
@@ -58,117 +273,73 @@ class SyncService:
|
|
|
58
273
|
)
|
|
59
274
|
logger.debug(f"Deleting from search index: {permalinks}")
|
|
60
275
|
for permalink in permalinks:
|
|
61
|
-
|
|
276
|
+
if permalink:
|
|
277
|
+
await self.search_service.delete_by_permalink(permalink)
|
|
278
|
+
else:
|
|
279
|
+
await self.search_service.delete_by_entity_id(entity.id)
|
|
62
280
|
|
|
63
|
-
async def
|
|
64
|
-
"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
#
|
|
71
|
-
|
|
72
|
-
logger.debug(f"Moving entity: {old_path} -> {new_path}")
|
|
73
|
-
entity = await self.entity_repository.get_by_file_path(old_path)
|
|
74
|
-
if entity:
|
|
75
|
-
# Update file_path but keep the same permalink for link stability
|
|
76
|
-
updated = await self.entity_repository.update(
|
|
77
|
-
entity.id, {"file_path": new_path, "checksum": changes.checksums[new_path]}
|
|
78
|
-
)
|
|
79
|
-
# update search index
|
|
80
|
-
if updated:
|
|
81
|
-
await self.search_service.index_entity(updated)
|
|
82
|
-
|
|
83
|
-
# Handle deletions next
|
|
84
|
-
# remove rows from db for files no longer present
|
|
85
|
-
for path in changes.deleted:
|
|
86
|
-
await self.handle_entity_deletion(path)
|
|
87
|
-
|
|
88
|
-
# Parse files that need updating
|
|
89
|
-
parsed_entities: Dict[str, EntityMarkdown] = {}
|
|
90
|
-
|
|
91
|
-
for path in [*changes.new, *changes.modified]:
|
|
92
|
-
entity_markdown = await self.entity_parser.parse_file(directory / path)
|
|
93
|
-
parsed_entities[path] = entity_markdown
|
|
94
|
-
|
|
95
|
-
# First pass: Create/update entities
|
|
96
|
-
# entities will have a null checksum to indicate they are not complete
|
|
97
|
-
for path, entity_markdown in parsed_entities.items():
|
|
98
|
-
# Get unique permalink and update markdown if needed
|
|
99
|
-
permalink = await self.entity_service.resolve_permalink(
|
|
100
|
-
Path(path), markdown=entity_markdown
|
|
101
|
-
)
|
|
281
|
+
async def handle_move(self, old_path, new_path):
|
|
282
|
+
logger.debug(f"Moving entity: {old_path} -> {new_path}")
|
|
283
|
+
entity = await self.entity_repository.get_by_file_path(old_path)
|
|
284
|
+
if entity:
|
|
285
|
+
# Update file_path but keep the same permalink for link stability
|
|
286
|
+
updated = await self.entity_repository.update(entity.id, {"file_path": new_path})
|
|
287
|
+
assert updated is not None, "entity should be updated"
|
|
288
|
+
# update search index
|
|
289
|
+
await self.search_service.index_entity(updated)
|
|
102
290
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
await self.entity_service.create_entity_from_markdown(
|
|
123
|
-
Path(path), entity_markdown
|
|
124
|
-
)
|
|
125
|
-
# otherwise we need to update the entity and observations
|
|
126
|
-
else:
|
|
127
|
-
logger.debug(f"Updating entity_markdown: {path}")
|
|
128
|
-
await self.entity_service.update_entity_and_observations(
|
|
129
|
-
Path(path), entity_markdown
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# Second pass
|
|
133
|
-
for path, entity_markdown in parsed_entities.items():
|
|
134
|
-
logger.debug(f"Updating relations for: {path}")
|
|
135
|
-
|
|
136
|
-
# Process relations
|
|
137
|
-
checksum = changes.checksums[path]
|
|
138
|
-
entity = await self.entity_service.update_entity_relations(
|
|
139
|
-
Path(path), entity_markdown
|
|
291
|
+
async def resolve_relations(self):
|
|
292
|
+
"""Try to resolve any unresolved relations"""
|
|
293
|
+
|
|
294
|
+
unresolved_relations = await self.relation_repository.find_unresolved_relations()
|
|
295
|
+
logger.debug(f"Attempting to resolve {len(unresolved_relations)} forward references")
|
|
296
|
+
for relation in unresolved_relations:
|
|
297
|
+
resolved_entity = await self.entity_service.link_resolver.resolve_link(relation.to_name)
|
|
298
|
+
|
|
299
|
+
# ignore reference to self
|
|
300
|
+
if resolved_entity and resolved_entity.id != relation.from_id:
|
|
301
|
+
logger.debug(
|
|
302
|
+
f"Resolved forward reference: {relation.to_name} -> {resolved_entity.title}"
|
|
303
|
+
)
|
|
304
|
+
await self.relation_repository.update(
|
|
305
|
+
relation.id,
|
|
306
|
+
{
|
|
307
|
+
"to_id": resolved_entity.id,
|
|
308
|
+
"to_name": resolved_entity.title,
|
|
309
|
+
},
|
|
140
310
|
)
|
|
141
311
|
|
|
142
|
-
#
|
|
143
|
-
await self.search_service.index_entity(
|
|
312
|
+
# update search index
|
|
313
|
+
await self.search_service.index_entity(resolved_entity)
|
|
144
314
|
|
|
145
|
-
|
|
146
|
-
|
|
315
|
+
async def scan_directory(self, directory: Path) -> ScanResult:
|
|
316
|
+
"""
|
|
317
|
+
Scan directory for markdown files and their checksums.
|
|
147
318
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
319
|
+
Args:
|
|
320
|
+
directory: Directory to scan
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
ScanResult containing found files and any errors
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
logger.debug(f"Scanning directory: {directory}")
|
|
327
|
+
result = ScanResult()
|
|
328
|
+
|
|
329
|
+
for root, dirnames, filenames in os.walk(str(directory)):
|
|
330
|
+
# Skip dot directories in-place
|
|
331
|
+
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
|
|
332
|
+
|
|
333
|
+
for filename in filenames:
|
|
334
|
+
# Skip dot files
|
|
335
|
+
if filename.startswith("."):
|
|
336
|
+
continue
|
|
337
|
+
|
|
338
|
+
path = Path(root) / filename
|
|
339
|
+
rel_path = str(path.relative_to(directory))
|
|
340
|
+
checksum = await self.file_service.compute_checksum(rel_path)
|
|
341
|
+
result.files[rel_path] = checksum
|
|
342
|
+
result.checksums[checksum] = rel_path
|
|
343
|
+
logger.debug(f"Found file: {rel_path} with checksum: {checksum}")
|
|
344
|
+
|
|
345
|
+
return result
|