memuron 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. memuron/__init__.py +3 -0
  2. memuron/actions/__init__.py +12 -0
  3. memuron/actions/context.py +63 -0
  4. memuron/actions/helpers.py +88 -0
  5. memuron/actions/memory.py +340 -0
  6. memuron/actions/memory_write.py +290 -0
  7. memuron/actions/nodes.py +340 -0
  8. memuron/actions/registry.py +5 -0
  9. memuron/actions/runtime.py +37 -0
  10. memuron/actions/spaces_documents.py +720 -0
  11. memuron/actions/sync.py +155 -0
  12. memuron/application/__init__.py +1 -0
  13. memuron/application/api.py +206 -0
  14. memuron/application/app.py +103 -0
  15. memuron/application/capabilities.py +82 -0
  16. memuron/application/cli.py +35 -0
  17. memuron/application/config.py +176 -0
  18. memuron/application/mcp.py +44 -0
  19. memuron/application/mcp_oauth.py +290 -0
  20. memuron/application/registry.py +52 -0
  21. memuron/context.py +532 -0
  22. memuron/documents/__init__.py +1 -0
  23. memuron/documents/link_guardian.py +192 -0
  24. memuron/documents/linking.py +292 -0
  25. memuron/documents/parser.py +1152 -0
  26. memuron/documents/storage.py +151 -0
  27. memuron/documents/url_ingest.py +375 -0
  28. memuron/domain/__init__.py +1 -0
  29. memuron/domain/decoders.py +1 -0
  30. memuron/domain/encoders.py +185 -0
  31. memuron/domain/lifecycles.py +8 -0
  32. memuron/domain/limits.py +6 -0
  33. memuron/domain/representations.py +56 -0
  34. memuron/domain/schemas.py +581 -0
  35. memuron/domain/scope_filter.py +104 -0
  36. memuron/graphfs/__init__.py +1 -0
  37. memuron/graphfs/manual.py +635 -0
  38. memuron/graphfs/projection.py +578 -0
  39. memuron/graphfs/query.py +1782 -0
  40. memuron/graphfs/read_model.py +574 -0
  41. memuron/ingest/__init__.py +1 -0
  42. memuron/ingest/guardian.py +213 -0
  43. memuron/ingest/jobs.py +424 -0
  44. memuron/ingest/prompts.py +147 -0
  45. memuron/memory/__init__.py +1 -0
  46. memuron/memory/engine.py +35 -0
  47. memuron/memory/projections.py +452 -0
  48. memuron/memory/recipes.py +3247 -0
  49. memuron/persistence/__init__.py +1 -0
  50. memuron/persistence/db_pool.py +57 -0
  51. memuron/persistence/identity_store.py +918 -0
  52. memuron/persistence/store_helpers.py +16 -0
  53. memuron/search/__init__.py +1 -0
  54. memuron/search/fulltext.py +110 -0
  55. memuron/search/hybrid.py +284 -0
  56. memuron/search/pgvector.py +252 -0
  57. memuron/security/__init__.py +1 -0
  58. memuron/security/auth.py +143 -0
  59. memuron/security/auth_provider.py +119 -0
  60. memuron/security/authorization.py +53 -0
  61. memuron/security/clerk_scopes.py +94 -0
  62. memuron/security/clerk_webhooks.py +61 -0
  63. memuron/security/jwt_tokens.py +53 -0
  64. memuron/security/passwords.py +38 -0
  65. memuron/security/tenant.py +58 -0
  66. memuron/spaces/__init__.py +1 -0
  67. memuron/spaces/model.py +35 -0
  68. memuron/spaces/service.py +155 -0
  69. memuron/sync/__init__.py +25 -0
  70. memuron/sync/folder.py +828 -0
  71. memuron-0.1.1.dist-info/METADATA +242 -0
  72. memuron-0.1.1.dist-info/RECORD +74 -0
  73. memuron-0.1.1.dist-info/WHEEL +4 -0
  74. memuron-0.1.1.dist-info/entry_points.txt +4 -0
memuron/sync/folder.py ADDED
@@ -0,0 +1,828 @@
1
+ """CLI-first one-way folder sync.
2
+
3
+ V0 intentionally imports local folders into Memuron collections and documents without
4
+ watching files, deleting remote graph nodes, or attempting a bidirectional mount.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import fnmatch
10
+ import hashlib
11
+ import json
12
+ import mimetypes
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from pathlib import Path
16
+ from typing import Any, Protocol
17
+
18
+ from artha_engine import ArthaEngine
19
+
20
+ from memuron.documents.parser import MAX_DOCUMENT_UPLOAD_BYTES
21
+ from memuron.memory.recipes import (
22
+ create_collection,
23
+ ensure_memory_projections,
24
+ ingest_document_source,
25
+ place_node_in_collection,
26
+ )
27
+
28
+ MANIFEST_VERSION = 1
29
+ DEFAULT_MANIFEST_RELATIVE_PATH = ".memuron/sync-manifest.json"
30
+ IGNORE_FILE_NAME = ".memuronignore"
31
+
32
+ DEFAULT_EXCLUDES = [
33
+ ".git/",
34
+ ".hg/",
35
+ ".svn/",
36
+ ".memuron/",
37
+ "node_modules/",
38
+ "__pycache__/",
39
+ ".pytest_cache/",
40
+ ".mypy_cache/",
41
+ ".ruff_cache/",
42
+ ".next/",
43
+ ".nuxt/",
44
+ ".turbo/",
45
+ ".venv/",
46
+ "venv/",
47
+ "dist/",
48
+ "build/",
49
+ "coverage/",
50
+ ".DS_Store",
51
+ "Thumbs.db",
52
+ "desktop.ini",
53
+ IGNORE_FILE_NAME,
54
+ ]
55
+
56
+ SUPPORTED_EXTENSIONS = {
57
+ ".csv",
58
+ ".docx",
59
+ ".gif",
60
+ ".htm",
61
+ ".html",
62
+ ".jpeg",
63
+ ".jpg",
64
+ ".json",
65
+ ".jsonl",
66
+ ".md",
67
+ ".markdown",
68
+ ".pdf",
69
+ ".png",
70
+ ".ppt",
71
+ ".pptx",
72
+ ".rtf",
73
+ ".text",
74
+ ".tsv",
75
+ ".txt",
76
+ ".webp",
77
+ ".xls",
78
+ ".xlsm",
79
+ ".xlsx",
80
+ ".xml",
81
+ ".yaml",
82
+ ".yml",
83
+ }
84
+
85
+
86
+ def _now_iso() -> str:
87
+ return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
88
+
89
+
90
+ def _rel(path: Path, root: Path) -> str:
91
+ value = path.relative_to(root).as_posix()
92
+ return value or "."
93
+
94
+
95
+ def _normalize_root(path: str | Path) -> Path:
96
+ root = Path(path).expanduser().resolve()
97
+ if not root.exists():
98
+ raise ValueError(f"Folder sync path does not exist: {root}")
99
+ if not root.is_dir():
100
+ raise ValueError(f"Folder sync path must be a directory: {root}")
101
+ return root
102
+
103
+
104
+ def manifest_path_for_root(root: str | Path) -> Path:
105
+ return _normalize_root(root) / DEFAULT_MANIFEST_RELATIVE_PATH
106
+
107
+
108
+ def _load_ignore_file(root: Path) -> list[str]:
109
+ ignore_file = root / IGNORE_FILE_NAME
110
+ if not ignore_file.exists():
111
+ return []
112
+ patterns: list[str] = []
113
+ for raw in ignore_file.read_text(encoding="utf-8").splitlines():
114
+ line = raw.strip()
115
+ if not line or line.startswith("#"):
116
+ continue
117
+ patterns.append(line)
118
+ return patterns
119
+
120
+
121
+ def _match_pattern(pattern: str, rel_path: str, *, is_dir: bool) -> bool:
122
+ negated = pattern.startswith("!")
123
+ if negated:
124
+ pattern = pattern[1:]
125
+ pattern = pattern.strip()
126
+ if not pattern:
127
+ return False
128
+ directory_only = pattern.endswith("/")
129
+ pattern = pattern.rstrip("/")
130
+ if directory_only and not is_dir:
131
+ return False
132
+
133
+ rel_path = rel_path.strip("/")
134
+ basename = rel_path.rsplit("/", 1)[-1]
135
+ parts = rel_path.split("/") if rel_path else []
136
+ if "/" in pattern:
137
+ return fnmatch.fnmatch(rel_path, pattern) or fnmatch.fnmatch(rel_path, f"{pattern}/*")
138
+ return (
139
+ fnmatch.fnmatch(basename, pattern)
140
+ or any(fnmatch.fnmatch(part, pattern) for part in parts)
141
+ or fnmatch.fnmatch(rel_path, pattern)
142
+ or fnmatch.fnmatch(rel_path, f"{pattern}/*")
143
+ )
144
+
145
+
146
+ def _matches_any(patterns: list[str], rel_path: str, *, is_dir: bool) -> bool:
147
+ ignored = False
148
+ for pattern in patterns:
149
+ if pattern.startswith("!"):
150
+ if ignored and _match_pattern(pattern, rel_path, is_dir=is_dir):
151
+ ignored = False
152
+ continue
153
+ if _match_pattern(pattern, rel_path, is_dir=is_dir):
154
+ ignored = True
155
+ return ignored
156
+
157
+
158
+ def _sha256(path: Path) -> str:
159
+ digest = hashlib.sha256()
160
+ with path.open("rb") as handle:
161
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
162
+ digest.update(chunk)
163
+ return digest.hexdigest()
164
+
165
+
166
+ def _supported(path: Path) -> bool:
167
+ return path.suffix.lower() in SUPPORTED_EXTENSIONS
168
+
169
+
170
+ @dataclass(frozen=True)
171
+ class ScannedDirectory:
172
+ path: Path
173
+ relative_path: str
174
+
175
+
176
+ @dataclass(frozen=True)
177
+ class ScannedFile:
178
+ path: Path
179
+ relative_path: str
180
+ sha256: str
181
+ mtime_ns: int
182
+ size_bytes: int
183
+
184
+
185
+ @dataclass(frozen=True)
186
+ class SkippedPath:
187
+ path: Path
188
+ relative_path: str
189
+ reason: str
190
+ size_bytes: int | None = None
191
+
192
+
193
+ @dataclass(frozen=True)
194
+ class FolderScan:
195
+ root_path: Path
196
+ directories: list[ScannedDirectory]
197
+ files: list[ScannedFile]
198
+ skipped: list[SkippedPath]
199
+
200
+
201
+ @dataclass
202
+ class SyncManifest:
203
+ root_path: str
204
+ space_ref: str
205
+ include: list[str] = field(default_factory=list)
206
+ exclude: list[str] = field(default_factory=list)
207
+ max_file_bytes: int = MAX_DOCUMENT_UPLOAD_BYTES
208
+ version: int = MANIFEST_VERSION
209
+ created_at: str = field(default_factory=_now_iso)
210
+ updated_at: str = field(default_factory=_now_iso)
211
+ space: dict[str, Any] = field(default_factory=dict)
212
+ directories: dict[str, dict[str, Any]] = field(default_factory=dict)
213
+ files: dict[str, dict[str, Any]] = field(default_factory=dict)
214
+ last_run: dict[str, Any] = field(default_factory=dict)
215
+
216
+ def to_dict(self) -> dict[str, Any]:
217
+ return {
218
+ "version": self.version,
219
+ "root_path": self.root_path,
220
+ "space_ref": self.space_ref,
221
+ "space": self.space,
222
+ "include": self.include,
223
+ "exclude": self.exclude,
224
+ "max_file_bytes": self.max_file_bytes,
225
+ "created_at": self.created_at,
226
+ "updated_at": self.updated_at,
227
+ "directories": self.directories,
228
+ "files": self.files,
229
+ "last_run": self.last_run,
230
+ }
231
+
232
+ @classmethod
233
+ def from_dict(cls, data: dict[str, Any]) -> "SyncManifest":
234
+ version = int(data.get("version") or 0)
235
+ if version != MANIFEST_VERSION:
236
+ raise ValueError(f"Unsupported folder sync manifest version: {version}")
237
+ return cls(
238
+ version=version,
239
+ root_path=str(data["root_path"]),
240
+ space_ref=str(data["space_ref"]),
241
+ space=dict(data.get("space") or {}),
242
+ include=list(data.get("include") or []),
243
+ exclude=list(data.get("exclude") or []),
244
+ max_file_bytes=int(data.get("max_file_bytes") or MAX_DOCUMENT_UPLOAD_BYTES),
245
+ created_at=str(data.get("created_at") or _now_iso()),
246
+ updated_at=str(data.get("updated_at") or _now_iso()),
247
+ directories=dict(data.get("directories") or {}),
248
+ files=dict(data.get("files") or {}),
249
+ last_run=dict(data.get("last_run") or {}),
250
+ )
251
+
252
+
253
+ @dataclass(frozen=True)
254
+ class FilePlanItem:
255
+ relative_path: str
256
+ action: str
257
+ reason: str
258
+ scanned: ScannedFile | None = None
259
+ skipped: SkippedPath | None = None
260
+ previous: dict[str, Any] = field(default_factory=dict)
261
+
262
+
263
+ @dataclass(frozen=True)
264
+ class DirectoryPlanItem:
265
+ relative_path: str
266
+ action: str
267
+ scanned: ScannedDirectory | None = None
268
+ previous: dict[str, Any] = field(default_factory=dict)
269
+
270
+
271
+ @dataclass(frozen=True)
272
+ class SyncPlan:
273
+ root_path: Path
274
+ manifest: SyncManifest
275
+ directories: list[DirectoryPlanItem]
276
+ files: list[FilePlanItem]
277
+ deleted_files: list[str]
278
+ deleted_directories: list[str]
279
+
280
+ def summary(self) -> dict[str, int]:
281
+ file_counts: dict[str, int] = {}
282
+ for item in self.files:
283
+ file_counts[item.action] = file_counts.get(item.action, 0) + 1
284
+ dir_counts: dict[str, int] = {}
285
+ for item in self.directories:
286
+ dir_counts[item.action] = dir_counts.get(item.action, 0) + 1
287
+ return {
288
+ "directories_create": dir_counts.get("create", 0),
289
+ "directories_unchanged": dir_counts.get("unchanged", 0),
290
+ "files_ingest": file_counts.get("ingest", 0),
291
+ "files_reingest": file_counts.get("reingest", 0),
292
+ "files_unchanged": file_counts.get("unchanged", 0),
293
+ "files_skipped": file_counts.get("skipped", 0),
294
+ "deleted_files_reported": len(self.deleted_files),
295
+ "deleted_directories_reported": len(self.deleted_directories),
296
+ }
297
+
298
+
299
+ class FolderSyncBackend(Protocol):
300
+ def create_collection(
301
+ self,
302
+ *,
303
+ relative_path: str,
304
+ name: str,
305
+ summary: str,
306
+ metadata: dict[str, Any],
307
+ ) -> dict[str, Any]:
308
+ ...
309
+
310
+ def place_node(
311
+ self,
312
+ *,
313
+ parent_id: str,
314
+ child_id: str,
315
+ name: str,
316
+ metadata: dict[str, Any],
317
+ ) -> dict[str, Any]:
318
+ ...
319
+
320
+ def ingest_file(
321
+ self,
322
+ file: ScannedFile,
323
+ *,
324
+ metadata: dict[str, Any],
325
+ ) -> dict[str, Any]:
326
+ ...
327
+
328
+
329
+ class EngineFolderSyncBackend:
330
+ def __init__(
331
+ self,
332
+ engine: ArthaEngine,
333
+ *,
334
+ scope: list[str],
335
+ event_metadata: dict[str, object] | None = None,
336
+ ) -> None:
337
+ self.engine = engine
338
+ self.scope = list(scope)
339
+ self.event_metadata = dict(event_metadata or {})
340
+
341
+ def create_collection(
342
+ self,
343
+ *,
344
+ relative_path: str,
345
+ name: str,
346
+ summary: str,
347
+ metadata: dict[str, Any],
348
+ ) -> dict[str, Any]:
349
+ return create_collection(
350
+ self.engine,
351
+ name=name,
352
+ summary=summary,
353
+ scope=self.scope,
354
+ metadata=metadata,
355
+ event_metadata=self.event_metadata,
356
+ )
357
+
358
+ def place_node(
359
+ self,
360
+ *,
361
+ parent_id: str,
362
+ child_id: str,
363
+ name: str,
364
+ metadata: dict[str, Any],
365
+ ) -> dict[str, Any]:
366
+ return place_node_in_collection(
367
+ self.engine,
368
+ parent_id=parent_id,
369
+ child_id=child_id,
370
+ name=name,
371
+ scope=self.scope,
372
+ metadata=metadata,
373
+ event_metadata=self.event_metadata,
374
+ )
375
+
376
+ def ingest_file(
377
+ self,
378
+ file: ScannedFile,
379
+ *,
380
+ metadata: dict[str, Any],
381
+ ) -> dict[str, Any]:
382
+ content_type = mimetypes.guess_type(file.path.name)[0]
383
+ return ingest_document_source(
384
+ self.engine,
385
+ file_name=file.path.name,
386
+ content_type=content_type,
387
+ file_bytes=file.path.read_bytes(),
388
+ scope=self.scope,
389
+ metadata=metadata,
390
+ event_metadata=self.event_metadata,
391
+ )
392
+
393
+
394
+ def scan_folder(
395
+ root: str | Path,
396
+ *,
397
+ include: list[str] | None = None,
398
+ exclude: list[str] | None = None,
399
+ max_file_bytes: int = MAX_DOCUMENT_UPLOAD_BYTES,
400
+ ) -> FolderScan:
401
+ root_path = _normalize_root(root)
402
+ includes = list(include or [])
403
+ excludes = [*DEFAULT_EXCLUDES, *_load_ignore_file(root_path), *(exclude or [])]
404
+ directories = [ScannedDirectory(root_path, ".")]
405
+ files: list[ScannedFile] = []
406
+ skipped: list[SkippedPath] = []
407
+
408
+ for current, dir_names, file_names in root_path.walk():
409
+ current_path = Path(current)
410
+ kept_dirs: list[str] = []
411
+ for dir_name in sorted(dir_names):
412
+ child = current_path / dir_name
413
+ rel_path = _rel(child, root_path)
414
+ if _matches_any(excludes, rel_path, is_dir=True):
415
+ skipped.append(SkippedPath(child, rel_path, "ignored directory"))
416
+ continue
417
+ if includes and not _matches_any(includes, rel_path, is_dir=True):
418
+ # Keep walking because an include may match a descendant.
419
+ kept_dirs.append(dir_name)
420
+ directories.append(ScannedDirectory(child, rel_path))
421
+ continue
422
+ kept_dirs.append(dir_name)
423
+ directories.append(ScannedDirectory(child, rel_path))
424
+ dir_names[:] = kept_dirs
425
+
426
+ for file_name in sorted(file_names):
427
+ path = current_path / file_name
428
+ rel_path = _rel(path, root_path)
429
+ try:
430
+ stat = path.stat()
431
+ except OSError as exc:
432
+ skipped.append(SkippedPath(path, rel_path, f"stat failed: {exc}"))
433
+ continue
434
+ if _matches_any(excludes, rel_path, is_dir=False):
435
+ skipped.append(SkippedPath(path, rel_path, "ignored file", stat.st_size))
436
+ continue
437
+ if includes and not _matches_any(includes, rel_path, is_dir=False):
438
+ skipped.append(SkippedPath(path, rel_path, "not included", stat.st_size))
439
+ continue
440
+ if stat.st_size > max_file_bytes:
441
+ skipped.append(
442
+ SkippedPath(
443
+ path,
444
+ rel_path,
445
+ f"file exceeds max upload size ({max_file_bytes} bytes)",
446
+ stat.st_size,
447
+ )
448
+ )
449
+ continue
450
+ if not _supported(path):
451
+ skipped.append(SkippedPath(path, rel_path, "unsupported file type", stat.st_size))
452
+ continue
453
+ files.append(
454
+ ScannedFile(
455
+ path=path,
456
+ relative_path=rel_path,
457
+ sha256=_sha256(path),
458
+ mtime_ns=stat.st_mtime_ns,
459
+ size_bytes=stat.st_size,
460
+ )
461
+ )
462
+ directories.sort(key=lambda item: (item.relative_path.count("/"), item.relative_path))
463
+ files.sort(key=lambda item: item.relative_path)
464
+ skipped.sort(key=lambda item: item.relative_path)
465
+ return FolderScan(root_path=root_path, directories=directories, files=files, skipped=skipped)
466
+
467
+
468
+ def init_manifest(
469
+ path: str | Path,
470
+ *,
471
+ space_ref: str,
472
+ include: list[str] | None = None,
473
+ exclude: list[str] | None = None,
474
+ max_file_bytes: int = MAX_DOCUMENT_UPLOAD_BYTES,
475
+ manifest_path: str | Path | None = None,
476
+ overwrite: bool = False,
477
+ ) -> tuple[SyncManifest, Path]:
478
+ root = _normalize_root(path)
479
+ target = Path(manifest_path).expanduser().resolve() if manifest_path else manifest_path_for_root(root)
480
+ if target.exists() and not overwrite:
481
+ raise ValueError(f"Folder sync manifest already exists: {target}")
482
+ manifest = SyncManifest(
483
+ root_path=str(root),
484
+ space_ref=space_ref,
485
+ include=list(include or []),
486
+ exclude=list(exclude or []),
487
+ max_file_bytes=max_file_bytes,
488
+ )
489
+ save_manifest(manifest, target)
490
+ return manifest, target
491
+
492
+
493
+ def load_manifest(path: str | Path) -> SyncManifest:
494
+ data = json.loads(Path(path).expanduser().read_text(encoding="utf-8"))
495
+ if not isinstance(data, dict):
496
+ raise ValueError("Folder sync manifest must contain a JSON object")
497
+ return SyncManifest.from_dict(data)
498
+
499
+
500
+ def save_manifest(manifest: SyncManifest, path: str | Path) -> None:
501
+ manifest.updated_at = _now_iso()
502
+ target = Path(path).expanduser()
503
+ target.parent.mkdir(parents=True, exist_ok=True)
504
+ target.write_text(
505
+ json.dumps(manifest.to_dict(), indent=2, sort_keys=True, ensure_ascii=True) + "\n",
506
+ encoding="utf-8",
507
+ )
508
+
509
+
510
+ def resolve_manifest_path(path: str | Path, manifest_path: str | Path | None = None) -> Path:
511
+ if manifest_path is not None:
512
+ return Path(manifest_path).expanduser().resolve()
513
+ candidate = Path(path).expanduser().resolve()
514
+ if candidate.is_file():
515
+ return candidate
516
+ return manifest_path_for_root(candidate)
517
+
518
+
519
+ def plan_folder_sync(manifest: SyncManifest) -> SyncPlan:
520
+ root = _normalize_root(manifest.root_path)
521
+ scan = scan_folder(
522
+ root,
523
+ include=manifest.include,
524
+ exclude=manifest.exclude,
525
+ max_file_bytes=manifest.max_file_bytes,
526
+ )
527
+ existing_dirs = manifest.directories
528
+ existing_files = manifest.files
529
+ seen_dirs = {item.relative_path for item in scan.directories}
530
+ seen_files = {item.relative_path for item in scan.files}
531
+ skipped_files = {item.relative_path for item in scan.skipped if item.path.is_file()}
532
+
533
+ dir_items: list[DirectoryPlanItem] = []
534
+ for directory in scan.directories:
535
+ previous = dict(existing_dirs.get(directory.relative_path) or {})
536
+ action = "unchanged" if previous.get("node_id") else "create"
537
+ dir_items.append(
538
+ DirectoryPlanItem(
539
+ relative_path=directory.relative_path,
540
+ action=action,
541
+ scanned=directory,
542
+ previous=previous,
543
+ )
544
+ )
545
+
546
+ file_items: list[FilePlanItem] = []
547
+ for file in scan.files:
548
+ previous = dict(existing_files.get(file.relative_path) or {})
549
+ if (
550
+ previous.get("sha256") == file.sha256
551
+ and previous.get("mtime_ns") == file.mtime_ns
552
+ and previous.get("size_bytes") == file.size_bytes
553
+ and previous.get("last_sync_status") == "synced"
554
+ ):
555
+ action = "unchanged"
556
+ reason = "hash, mtime, and size unchanged"
557
+ elif previous.get("collection_id") or previous.get("document_id"):
558
+ action = "reingest"
559
+ reason = "file changed; V0 imports a new document graph and keeps old nodes"
560
+ else:
561
+ action = "ingest"
562
+ reason = "new supported file"
563
+ file_items.append(
564
+ FilePlanItem(
565
+ relative_path=file.relative_path,
566
+ action=action,
567
+ reason=reason,
568
+ scanned=file,
569
+ previous=previous,
570
+ )
571
+ )
572
+ for skipped in scan.skipped:
573
+ if skipped.relative_path in skipped_files:
574
+ file_items.append(
575
+ FilePlanItem(
576
+ relative_path=skipped.relative_path,
577
+ action="skipped",
578
+ reason=skipped.reason,
579
+ skipped=skipped,
580
+ previous=dict(existing_files.get(skipped.relative_path) or {}),
581
+ )
582
+ )
583
+
584
+ deleted_files = sorted(
585
+ rel
586
+ for rel in existing_files
587
+ if rel not in seen_files and rel not in skipped_files and rel != "."
588
+ )
589
+ deleted_dirs = sorted(rel for rel in existing_dirs if rel not in seen_dirs and rel != ".")
590
+ file_items.sort(key=lambda item: item.relative_path)
591
+ return SyncPlan(
592
+ root_path=root,
593
+ manifest=manifest,
594
+ directories=dir_items,
595
+ files=file_items,
596
+ deleted_files=deleted_files,
597
+ deleted_directories=deleted_dirs,
598
+ )
599
+
600
+
601
+ def _folder_metadata(root: Path, relative_path: str) -> dict[str, Any]:
602
+ return {
603
+ "system": {
604
+ "folder_sync": {
605
+ "version": MANIFEST_VERSION,
606
+ "kind": "directory",
607
+ "root_path": str(root),
608
+ "relative_path": relative_path,
609
+ }
610
+ }
611
+ }
612
+
613
+
614
+ def _file_metadata(root: Path, file: ScannedFile) -> dict[str, Any]:
615
+ return {
616
+ "system": {
617
+ "folder_sync": {
618
+ "version": MANIFEST_VERSION,
619
+ "kind": "file",
620
+ "root_path": str(root),
621
+ "path": str(file.path),
622
+ "relative_path": file.relative_path,
623
+ "sha256": file.sha256,
624
+ "mtime_ns": file.mtime_ns,
625
+ "size_bytes": file.size_bytes,
626
+ }
627
+ }
628
+ }
629
+
630
+
631
+ def _parent_relative_path(relative_path: str) -> str:
632
+ if relative_path == "." or "/" not in relative_path:
633
+ return "."
634
+ return relative_path.rsplit("/", 1)[0]
635
+
636
+
637
+ def run_folder_sync(
638
+ manifest: SyncManifest,
639
+ *,
640
+ backend: FolderSyncBackend,
641
+ manifest_path: str | Path | None = None,
642
+ dry_run: bool = False,
643
+ ) -> dict[str, Any]:
644
+ plan = plan_folder_sync(manifest)
645
+ if dry_run:
646
+ return {"status": "planned", "summary": plan.summary(), "manifest": manifest.to_dict()}
647
+
648
+ for item in plan.directories:
649
+ previous = dict(manifest.directories.get(item.relative_path) or {})
650
+ scanned = item.scanned
651
+ if scanned is None:
652
+ continue
653
+ if item.action == "create":
654
+ name = plan.root_path.name if item.relative_path == "." else scanned.path.name
655
+ summary = (
656
+ f"Folder sync root: {plan.root_path}"
657
+ if item.relative_path == "."
658
+ else f"Folder synced from {item.relative_path}"
659
+ )
660
+ collection = backend.create_collection(
661
+ relative_path=item.relative_path,
662
+ name=name,
663
+ summary=summary,
664
+ metadata=_folder_metadata(plan.root_path, item.relative_path),
665
+ )
666
+ previous.update(
667
+ {
668
+ "path": str(scanned.path),
669
+ "relative_path": item.relative_path,
670
+ "node_id": collection["id"],
671
+ "last_sync_status": "synced",
672
+ "last_synced_at": _now_iso(),
673
+ }
674
+ )
675
+ if item.relative_path != ".":
676
+ parent_rel = _parent_relative_path(item.relative_path)
677
+ parent = manifest.directories.get(parent_rel) or {}
678
+ parent_id = parent.get("node_id")
679
+ if parent_id:
680
+ placement = backend.place_node(
681
+ parent_id=str(parent_id),
682
+ child_id=str(collection["id"]),
683
+ name=scanned.path.name,
684
+ metadata={
685
+ "role": "folder_sync_directory",
686
+ "relative_path": item.relative_path,
687
+ },
688
+ )
689
+ previous["placement_id"] = placement["id"]
690
+ else:
691
+ previous.update(
692
+ {
693
+ "path": str(scanned.path),
694
+ "relative_path": item.relative_path,
695
+ "last_sync_status": "synced",
696
+ }
697
+ )
698
+ manifest.directories[item.relative_path] = previous
699
+
700
+ for item in plan.files:
701
+ previous = dict(manifest.files.get(item.relative_path) or {})
702
+ if item.action == "unchanged" and item.scanned is not None:
703
+ previous.update(
704
+ {
705
+ "path": str(item.scanned.path),
706
+ "relative_path": item.relative_path,
707
+ "sha256": item.scanned.sha256,
708
+ "mtime_ns": item.scanned.mtime_ns,
709
+ "size_bytes": item.scanned.size_bytes,
710
+ "last_sync_status": "synced",
711
+ "last_skip_reason": None,
712
+ }
713
+ )
714
+ manifest.files[item.relative_path] = previous
715
+ continue
716
+ if item.action == "skipped" and item.skipped is not None:
717
+ previous.update(
718
+ {
719
+ "path": str(item.skipped.path),
720
+ "relative_path": item.relative_path,
721
+ "size_bytes": item.skipped.size_bytes,
722
+ "last_sync_status": "skipped",
723
+ "last_skip_reason": item.reason,
724
+ "last_synced_at": _now_iso(),
725
+ }
726
+ )
727
+ manifest.files[item.relative_path] = previous
728
+ continue
729
+ if item.scanned is None:
730
+ continue
731
+ try:
732
+ payload = backend.ingest_file(
733
+ item.scanned,
734
+ metadata=_file_metadata(plan.root_path, item.scanned),
735
+ )
736
+ parent_rel = _parent_relative_path(item.relative_path)
737
+ parent = manifest.directories.get(parent_rel) or manifest.directories.get(".") or {}
738
+ parent_id = parent.get("node_id")
739
+ placement_id = None
740
+ collection_id = str(payload["collection"]["id"])
741
+ if parent_id:
742
+ placement = backend.place_node(
743
+ parent_id=str(parent_id),
744
+ child_id=collection_id,
745
+ name=item.scanned.path.name,
746
+ metadata={"role": "folder_sync_file", "relative_path": item.relative_path},
747
+ )
748
+ placement_id = placement["id"]
749
+ history = list(previous.get("history") or [])
750
+ if previous.get("collection_id") or previous.get("document_id"):
751
+ history.append(
752
+ {
753
+ "collection_id": previous.get("collection_id"),
754
+ "document_id": previous.get("document_id"),
755
+ "sha256": previous.get("sha256"),
756
+ "replaced_at": _now_iso(),
757
+ }
758
+ )
759
+ previous.update(
760
+ {
761
+ "path": str(item.scanned.path),
762
+ "relative_path": item.relative_path,
763
+ "sha256": item.scanned.sha256,
764
+ "mtime_ns": item.scanned.mtime_ns,
765
+ "size_bytes": item.scanned.size_bytes,
766
+ "collection_id": collection_id,
767
+ "document_id": str(payload["document"]["id"]),
768
+ "document_key": str(payload["document_key"]),
769
+ "placement_id": placement_id,
770
+ "last_sync_status": "synced",
771
+ "last_sync_action": item.action,
772
+ "last_error": None,
773
+ "last_skip_reason": None,
774
+ "last_synced_at": _now_iso(),
775
+ "history": history,
776
+ }
777
+ )
778
+ except Exception as exc:
779
+ previous.update(
780
+ {
781
+ "path": str(item.scanned.path),
782
+ "relative_path": item.relative_path,
783
+ "sha256": item.scanned.sha256,
784
+ "mtime_ns": item.scanned.mtime_ns,
785
+ "size_bytes": item.scanned.size_bytes,
786
+ "last_sync_status": "error",
787
+ "last_error": str(exc),
788
+ "last_synced_at": _now_iso(),
789
+ }
790
+ )
791
+ manifest.files[item.relative_path] = previous
792
+
793
+ for relative_path in plan.deleted_files:
794
+ entry = dict(manifest.files.get(relative_path) or {})
795
+ entry["last_sync_status"] = "deleted_local"
796
+ entry["last_skip_reason"] = "local path missing; V0 does not delete remote nodes"
797
+ entry["last_synced_at"] = _now_iso()
798
+ manifest.files[relative_path] = entry
799
+ for relative_path in plan.deleted_directories:
800
+ entry = dict(manifest.directories.get(relative_path) or {})
801
+ entry["last_sync_status"] = "deleted_local"
802
+ entry["last_skip_reason"] = "local path missing; V0 does not delete remote nodes"
803
+ entry["last_synced_at"] = _now_iso()
804
+ manifest.directories[relative_path] = entry
805
+
806
+ manifest.last_run = {"ran_at": _now_iso(), "summary": plan.summary()}
807
+ if manifest_path is not None:
808
+ save_manifest(manifest, manifest_path)
809
+ return {"status": "success", "summary": plan.summary(), "manifest": manifest.to_dict()}
810
+
811
+
812
+ def run_engine_folder_sync(
813
+ engine: ArthaEngine,
814
+ manifest: SyncManifest,
815
+ *,
816
+ scope: list[str],
817
+ event_metadata: dict[str, object] | None = None,
818
+ manifest_path: str | Path | None = None,
819
+ dry_run: bool = False,
820
+ ) -> dict[str, Any]:
821
+ ensure_memory_projections(engine)
822
+ backend = EngineFolderSyncBackend(engine, scope=scope, event_metadata=event_metadata)
823
+ return run_folder_sync(
824
+ manifest,
825
+ backend=backend,
826
+ manifest_path=manifest_path,
827
+ dry_run=dry_run,
828
+ )