agmem 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. agmem-0.1.1.dist-info/METADATA +656 -0
  2. agmem-0.1.1.dist-info/RECORD +67 -0
  3. agmem-0.1.1.dist-info/WHEEL +5 -0
  4. agmem-0.1.1.dist-info/entry_points.txt +2 -0
  5. agmem-0.1.1.dist-info/licenses/LICENSE +21 -0
  6. agmem-0.1.1.dist-info/top_level.txt +1 -0
  7. memvcs/__init__.py +9 -0
  8. memvcs/cli.py +178 -0
  9. memvcs/commands/__init__.py +23 -0
  10. memvcs/commands/add.py +258 -0
  11. memvcs/commands/base.py +23 -0
  12. memvcs/commands/blame.py +169 -0
  13. memvcs/commands/branch.py +110 -0
  14. memvcs/commands/checkout.py +101 -0
  15. memvcs/commands/clean.py +76 -0
  16. memvcs/commands/clone.py +91 -0
  17. memvcs/commands/commit.py +174 -0
  18. memvcs/commands/daemon.py +267 -0
  19. memvcs/commands/diff.py +157 -0
  20. memvcs/commands/fsck.py +203 -0
  21. memvcs/commands/garden.py +107 -0
  22. memvcs/commands/graph.py +151 -0
  23. memvcs/commands/init.py +61 -0
  24. memvcs/commands/log.py +103 -0
  25. memvcs/commands/mcp.py +59 -0
  26. memvcs/commands/merge.py +88 -0
  27. memvcs/commands/pull.py +65 -0
  28. memvcs/commands/push.py +143 -0
  29. memvcs/commands/reflog.py +52 -0
  30. memvcs/commands/remote.py +51 -0
  31. memvcs/commands/reset.py +98 -0
  32. memvcs/commands/search.py +163 -0
  33. memvcs/commands/serve.py +54 -0
  34. memvcs/commands/show.py +125 -0
  35. memvcs/commands/stash.py +97 -0
  36. memvcs/commands/status.py +112 -0
  37. memvcs/commands/tag.py +117 -0
  38. memvcs/commands/test.py +132 -0
  39. memvcs/commands/tree.py +156 -0
  40. memvcs/core/__init__.py +21 -0
  41. memvcs/core/config_loader.py +245 -0
  42. memvcs/core/constants.py +12 -0
  43. memvcs/core/diff.py +380 -0
  44. memvcs/core/gardener.py +466 -0
  45. memvcs/core/hooks.py +151 -0
  46. memvcs/core/knowledge_graph.py +381 -0
  47. memvcs/core/merge.py +474 -0
  48. memvcs/core/objects.py +323 -0
  49. memvcs/core/pii_scanner.py +343 -0
  50. memvcs/core/refs.py +447 -0
  51. memvcs/core/remote.py +278 -0
  52. memvcs/core/repository.py +522 -0
  53. memvcs/core/schema.py +414 -0
  54. memvcs/core/staging.py +227 -0
  55. memvcs/core/storage/__init__.py +72 -0
  56. memvcs/core/storage/base.py +359 -0
  57. memvcs/core/storage/gcs.py +308 -0
  58. memvcs/core/storage/local.py +182 -0
  59. memvcs/core/storage/s3.py +369 -0
  60. memvcs/core/test_runner.py +371 -0
  61. memvcs/core/vector_store.py +313 -0
  62. memvcs/integrations/__init__.py +5 -0
  63. memvcs/integrations/mcp_server.py +267 -0
  64. memvcs/integrations/web_ui/__init__.py +1 -0
  65. memvcs/integrations/web_ui/server.py +352 -0
  66. memvcs/utils/__init__.py +9 -0
  67. memvcs/utils/helpers.py +178 -0
memvcs/core/schema.py ADDED
@@ -0,0 +1,414 @@
1
+ """
2
+ Schema validation for agmem memory files.
3
+
4
+ Implements YAML frontmatter parsing and validation for structured memory metadata.
5
+ """
6
+
7
+ import re
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Optional, Dict, Any, List, Tuple
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+
14
+ try:
15
+ import yaml
16
+ YAML_AVAILABLE = True
17
+ except ImportError:
18
+ YAML_AVAILABLE = False
19
+
20
+ from .constants import MEMORY_TYPES
21
+
22
+
23
+ class MemoryType(Enum):
24
+ """Memory types with their validation requirements."""
25
+ EPISODIC = "episodic"
26
+ SEMANTIC = "semantic"
27
+ PROCEDURAL = "procedural"
28
+ CHECKPOINTS = "checkpoints"
29
+ SESSION_SUMMARIES = "session-summaries"
30
+ UNKNOWN = "unknown"
31
+
32
+
33
+ @dataclass
34
+ class FrontmatterData:
35
+ """Parsed frontmatter data from a memory file."""
36
+ schema_version: str = "1.0"
37
+ last_updated: Optional[str] = None
38
+ source_agent_id: Optional[str] = None
39
+ confidence_score: Optional[float] = None
40
+ memory_type: Optional[str] = None
41
+ tags: List[str] = field(default_factory=list)
42
+ extra: Dict[str, Any] = field(default_factory=dict)
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert to dictionary for serialization."""
46
+ result = {
47
+ "schema_version": self.schema_version,
48
+ }
49
+ if self.last_updated:
50
+ result["last_updated"] = self.last_updated
51
+ if self.source_agent_id:
52
+ result["source_agent_id"] = self.source_agent_id
53
+ if self.confidence_score is not None:
54
+ result["confidence_score"] = self.confidence_score
55
+ if self.memory_type:
56
+ result["memory_type"] = self.memory_type
57
+ if self.tags:
58
+ result["tags"] = self.tags
59
+ result.update(self.extra)
60
+ return result
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: Dict[str, Any]) -> 'FrontmatterData':
64
+ """Create from dictionary."""
65
+ known_fields = {
66
+ 'schema_version', 'last_updated', 'source_agent_id',
67
+ 'confidence_score', 'memory_type', 'tags'
68
+ }
69
+ extra = {k: v for k, v in data.items() if k not in known_fields}
70
+
71
+ return cls(
72
+ schema_version=data.get('schema_version', '1.0'),
73
+ last_updated=data.get('last_updated'),
74
+ source_agent_id=data.get('source_agent_id'),
75
+ confidence_score=data.get('confidence_score'),
76
+ memory_type=data.get('memory_type'),
77
+ tags=data.get('tags', []),
78
+ extra=extra
79
+ )
80
+
81
+
82
+ @dataclass
83
+ class ValidationError:
84
+ """A single validation error."""
85
+ field: str
86
+ message: str
87
+ severity: str = "error" # "error" or "warning"
88
+
89
+
90
+ @dataclass
91
+ class ValidationResult:
92
+ """Result of validating a memory file."""
93
+ valid: bool
94
+ errors: List[ValidationError] = field(default_factory=list)
95
+ warnings: List[ValidationError] = field(default_factory=list)
96
+ frontmatter: Optional[FrontmatterData] = None
97
+
98
+ def add_error(self, field: str, message: str):
99
+ """Add a validation error."""
100
+ self.errors.append(ValidationError(field=field, message=message, severity="error"))
101
+ self.valid = False
102
+
103
+ def add_warning(self, field: str, message: str):
104
+ """Add a validation warning."""
105
+ self.warnings.append(ValidationError(field=field, message=message, severity="warning"))
106
+
107
+
108
+ class FrontmatterParser:
109
+ """Parser for YAML frontmatter in memory files."""
110
+
111
+ # Regex to match YAML frontmatter block
112
+ FRONTMATTER_PATTERN = re.compile(
113
+ r'^---\s*\n(.*?)\n---\s*\n',
114
+ re.DOTALL | re.MULTILINE
115
+ )
116
+
117
+ @classmethod
118
+ def parse(cls, content: str) -> Tuple[Optional[FrontmatterData], str]:
119
+ """
120
+ Parse frontmatter from content.
121
+
122
+ Args:
123
+ content: Full file content
124
+
125
+ Returns:
126
+ Tuple of (frontmatter_data, body_content)
127
+ frontmatter_data is None if no frontmatter found
128
+ """
129
+ if not YAML_AVAILABLE:
130
+ # Without PyYAML, return None for frontmatter
131
+ return None, content
132
+
133
+ match = cls.FRONTMATTER_PATTERN.match(content)
134
+ if not match:
135
+ return None, content
136
+
137
+ yaml_content = match.group(1)
138
+ body = content[match.end():]
139
+
140
+ try:
141
+ data = yaml.safe_load(yaml_content)
142
+ if not isinstance(data, dict):
143
+ return None, content
144
+
145
+ frontmatter = FrontmatterData.from_dict(data)
146
+ return frontmatter, body
147
+ except yaml.YAMLError:
148
+ return None, content
149
+
150
+ @classmethod
151
+ def has_frontmatter(cls, content: str) -> bool:
152
+ """Check if content has YAML frontmatter."""
153
+ return bool(cls.FRONTMATTER_PATTERN.match(content))
154
+
155
+ @classmethod
156
+ def create_frontmatter(cls, data: FrontmatterData) -> str:
157
+ """
158
+ Create YAML frontmatter string from data.
159
+
160
+ Args:
161
+ data: FrontmatterData to serialize
162
+
163
+ Returns:
164
+ YAML frontmatter string with delimiters
165
+ """
166
+ if not YAML_AVAILABLE:
167
+ # Manual YAML generation without PyYAML
168
+ lines = ["---"]
169
+ d = data.to_dict()
170
+ for key, value in d.items():
171
+ if isinstance(value, list):
172
+ lines.append(f"{key}: [{', '.join(str(v) for v in value)}]")
173
+ elif value is not None:
174
+ lines.append(f"{key}: {value}")
175
+ lines.append("---")
176
+ return '\n'.join(lines) + '\n'
177
+
178
+ yaml_str = yaml.dump(data.to_dict(), default_flow_style=False, sort_keys=False)
179
+ return f"---\n{yaml_str}---\n"
180
+
181
+ @classmethod
182
+ def add_or_update_frontmatter(cls, content: str, data: FrontmatterData) -> str:
183
+ """
184
+ Add or update frontmatter in content.
185
+
186
+ Args:
187
+ content: Original file content
188
+ data: FrontmatterData to add/update
189
+
190
+ Returns:
191
+ Content with updated frontmatter
192
+ """
193
+ _, body = cls.parse(content)
194
+ frontmatter_str = cls.create_frontmatter(data)
195
+ return frontmatter_str + body
196
+
197
+
198
+ class SchemaValidator:
199
+ """Validates memory files against schema requirements."""
200
+
201
+ # Required fields per memory type
202
+ REQUIRED_FIELDS: Dict[MemoryType, List[str]] = {
203
+ MemoryType.SEMANTIC: ['schema_version', 'last_updated'],
204
+ MemoryType.EPISODIC: ['schema_version'],
205
+ MemoryType.PROCEDURAL: ['schema_version', 'last_updated'],
206
+ MemoryType.CHECKPOINTS: ['schema_version', 'last_updated'],
207
+ MemoryType.SESSION_SUMMARIES: ['schema_version', 'last_updated'],
208
+ MemoryType.UNKNOWN: ['schema_version'],
209
+ }
210
+
211
+ # Recommended fields per memory type (generate warnings if missing)
212
+ RECOMMENDED_FIELDS: Dict[MemoryType, List[str]] = {
213
+ MemoryType.SEMANTIC: ['source_agent_id', 'confidence_score', 'tags'],
214
+ MemoryType.EPISODIC: ['source_agent_id'],
215
+ MemoryType.PROCEDURAL: ['source_agent_id', 'tags'],
216
+ MemoryType.CHECKPOINTS: ['source_agent_id'],
217
+ MemoryType.SESSION_SUMMARIES: ['source_agent_id'],
218
+ MemoryType.UNKNOWN: [],
219
+ }
220
+
221
+ @classmethod
222
+ def detect_memory_type(cls, filepath: str) -> MemoryType:
223
+ """
224
+ Detect memory type from file path.
225
+
226
+ Args:
227
+ filepath: Path to the file
228
+
229
+ Returns:
230
+ MemoryType enum value
231
+ """
232
+ path_lower = filepath.lower()
233
+
234
+ if 'episodic' in path_lower:
235
+ return MemoryType.EPISODIC
236
+ elif 'semantic' in path_lower:
237
+ return MemoryType.SEMANTIC
238
+ elif 'procedural' in path_lower:
239
+ return MemoryType.PROCEDURAL
240
+ elif 'checkpoint' in path_lower:
241
+ return MemoryType.CHECKPOINTS
242
+ elif 'session-summar' in path_lower or 'session_summar' in path_lower:
243
+ return MemoryType.SESSION_SUMMARIES
244
+
245
+ return MemoryType.UNKNOWN
246
+
247
+ @classmethod
248
+ def validate(cls, content: str, filepath: str, strict: bool = False) -> ValidationResult:
249
+ """
250
+ Validate a memory file's frontmatter.
251
+
252
+ Args:
253
+ content: File content
254
+ filepath: Path to the file (for type detection)
255
+ strict: If True, treat warnings as errors
256
+
257
+ Returns:
258
+ ValidationResult with errors and warnings
259
+ """
260
+ result = ValidationResult(valid=True)
261
+ memory_type = cls.detect_memory_type(filepath)
262
+
263
+ # Parse frontmatter
264
+ frontmatter, body = FrontmatterParser.parse(content)
265
+ result.frontmatter = frontmatter
266
+
267
+ # Check for missing frontmatter
268
+ if frontmatter is None:
269
+ result.add_error('frontmatter', 'Missing YAML frontmatter block')
270
+ return result
271
+
272
+ # Check required fields
273
+ required = cls.REQUIRED_FIELDS.get(memory_type, [])
274
+ frontmatter_dict = frontmatter.to_dict()
275
+
276
+ for field in required:
277
+ if field not in frontmatter_dict or frontmatter_dict[field] is None:
278
+ result.add_error(field, f"Required field '{field}' is missing")
279
+
280
+ # Check recommended fields
281
+ recommended = cls.RECOMMENDED_FIELDS.get(memory_type, [])
282
+ for field in recommended:
283
+ if field not in frontmatter_dict or frontmatter_dict[field] is None:
284
+ if strict:
285
+ result.add_error(field, f"Recommended field '{field}' is missing (strict mode)")
286
+ else:
287
+ result.add_warning(field, f"Recommended field '{field}' is missing")
288
+
289
+ # Validate schema_version format
290
+ if frontmatter.schema_version:
291
+ if not re.match(r'^\d+\.\d+$', frontmatter.schema_version):
292
+ result.add_error('schema_version',
293
+ f"Invalid schema_version format: '{frontmatter.schema_version}' (expected X.Y)")
294
+
295
+ # Validate last_updated format (ISO 8601)
296
+ if frontmatter.last_updated:
297
+ try:
298
+ # Try parsing ISO format
299
+ if frontmatter.last_updated.endswith('Z'):
300
+ datetime.fromisoformat(frontmatter.last_updated.replace('Z', '+00:00'))
301
+ else:
302
+ datetime.fromisoformat(frontmatter.last_updated)
303
+ except ValueError:
304
+ result.add_error('last_updated',
305
+ f"Invalid last_updated format: '{frontmatter.last_updated}' (expected ISO 8601)")
306
+
307
+ # Validate confidence_score range
308
+ if frontmatter.confidence_score is not None:
309
+ if not isinstance(frontmatter.confidence_score, (int, float)):
310
+ result.add_error('confidence_score',
311
+ f"confidence_score must be a number, got: {type(frontmatter.confidence_score).__name__}")
312
+ elif not (0.0 <= frontmatter.confidence_score <= 1.0):
313
+ result.add_error('confidence_score',
314
+ f"confidence_score must be between 0.0 and 1.0, got: {frontmatter.confidence_score}")
315
+
316
+ # Validate memory_type if specified
317
+ if frontmatter.memory_type:
318
+ valid_types = [mt.value for mt in MemoryType if mt != MemoryType.UNKNOWN]
319
+ if frontmatter.memory_type not in valid_types:
320
+ result.add_warning('memory_type',
321
+ f"Unknown memory_type: '{frontmatter.memory_type}' (expected one of: {valid_types})")
322
+
323
+ # Validate tags is a list
324
+ if frontmatter.tags and not isinstance(frontmatter.tags, list):
325
+ result.add_error('tags', f"tags must be a list, got: {type(frontmatter.tags).__name__}")
326
+
327
+ return result
328
+
329
+ @classmethod
330
+ def validate_batch(cls, files: Dict[str, str], strict: bool = False) -> Dict[str, ValidationResult]:
331
+ """
332
+ Validate multiple files.
333
+
334
+ Args:
335
+ files: Dict mapping filepath to content
336
+ strict: If True, treat warnings as errors
337
+
338
+ Returns:
339
+ Dict mapping filepath to ValidationResult
340
+ """
341
+ results = {}
342
+ for filepath, content in files.items():
343
+ results[filepath] = cls.validate(content, filepath, strict)
344
+ return results
345
+
346
+
347
+ def generate_frontmatter(
348
+ memory_type: str = "semantic",
349
+ source_agent_id: Optional[str] = None,
350
+ confidence_score: Optional[float] = None,
351
+ tags: Optional[List[str]] = None
352
+ ) -> FrontmatterData:
353
+ """
354
+ Generate frontmatter data with current timestamp.
355
+
356
+ Args:
357
+ memory_type: Type of memory (episodic, semantic, procedural, etc.)
358
+ source_agent_id: ID of the agent creating this memory
359
+ confidence_score: Confidence score (0.0 to 1.0)
360
+ tags: List of tags for categorization
361
+
362
+ Returns:
363
+ FrontmatterData with populated fields
364
+ """
365
+ return FrontmatterData(
366
+ schema_version="1.0",
367
+ last_updated=datetime.utcnow().isoformat() + 'Z',
368
+ source_agent_id=source_agent_id,
369
+ confidence_score=confidence_score,
370
+ memory_type=memory_type,
371
+ tags=tags or []
372
+ )
373
+
374
+
375
+ def compare_timestamps(timestamp1: Optional[str], timestamp2: Optional[str]) -> int:
376
+ """
377
+ Compare two ISO 8601 timestamps.
378
+
379
+ Args:
380
+ timestamp1: First timestamp
381
+ timestamp2: Second timestamp
382
+
383
+ Returns:
384
+ -1 if timestamp1 < timestamp2
385
+ 0 if timestamp1 == timestamp2
386
+ 1 if timestamp1 > timestamp2
387
+
388
+ If either timestamp is None or invalid, the other is considered newer.
389
+ """
390
+ def parse_ts(ts: Optional[str]) -> Optional[datetime]:
391
+ if not ts:
392
+ return None
393
+ try:
394
+ if ts.endswith('Z'):
395
+ return datetime.fromisoformat(ts.replace('Z', '+00:00'))
396
+ return datetime.fromisoformat(ts)
397
+ except ValueError:
398
+ return None
399
+
400
+ dt1 = parse_ts(timestamp1)
401
+ dt2 = parse_ts(timestamp2)
402
+
403
+ if dt1 is None and dt2 is None:
404
+ return 0
405
+ if dt1 is None:
406
+ return -1
407
+ if dt2 is None:
408
+ return 1
409
+
410
+ if dt1 < dt2:
411
+ return -1
412
+ elif dt1 > dt2:
413
+ return 1
414
+ return 0
memvcs/core/staging.py ADDED
@@ -0,0 +1,227 @@
1
+ """
2
+ Staging area for agmem.
3
+
4
+ Manages the index of files staged for commit.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import shutil
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Set, Tuple
12
+ from dataclasses import dataclass, asdict
13
+
14
+
15
+ @dataclass
16
+ class StagedFile:
17
+ """Represents a file in the staging area."""
18
+ path: str # Relative path from current/
19
+ blob_hash: str
20
+ mode: int = 0o100644 # Regular file
21
+
22
+
23
+ def _path_under_root(relative_path: str, root: Path) -> Optional[Path]:
24
+ """
25
+ Resolve relative_path under root and ensure it stays inside root.
26
+ Returns the resolved Path or None if path escapes root (path traversal).
27
+ """
28
+ try:
29
+ resolved = (root / relative_path).resolve()
30
+ resolved.relative_to(root.resolve())
31
+ return resolved
32
+ except ValueError:
33
+ return None
34
+
35
+
36
+ class StagingArea:
37
+ """Manages the staging area for memory commits."""
38
+
39
+ def __init__(self, mem_dir: Path):
40
+ self.mem_dir = Path(mem_dir)
41
+ self.staging_dir = self.mem_dir / 'staging'
42
+ self.index_file = self.mem_dir / 'index.json'
43
+ self._index: Dict[str, StagedFile] = {}
44
+ self._load_index()
45
+
46
+ def _load_index(self):
47
+ """Load the staging index from disk."""
48
+ if self.index_file.exists():
49
+ try:
50
+ data = json.loads(self.index_file.read_text())
51
+ for path, info in data.items():
52
+ if _path_under_root(path, self.staging_dir) is None:
53
+ continue
54
+ self._index[path] = StagedFile(
55
+ path=path,
56
+ blob_hash=info['blob_hash'],
57
+ mode=info.get('mode', 0o100644)
58
+ )
59
+ except (json.JSONDecodeError, KeyError):
60
+ self._index = {}
61
+
62
+ def _save_index(self):
63
+ """Save the staging index to disk."""
64
+ data = {
65
+ path: {
66
+ 'blob_hash': sf.blob_hash,
67
+ 'mode': sf.mode
68
+ }
69
+ for path, sf in self._index.items()
70
+ }
71
+ self.index_file.write_text(json.dumps(data, indent=2))
72
+
73
+ def add(self, filepath: str, blob_hash: str, content: bytes, mode: int = 0o100644):
74
+ """
75
+ Add a file to the staging area.
76
+
77
+ Args:
78
+ filepath: Relative path from current/
79
+ blob_hash: Hash of the blob object
80
+ content: File content bytes
81
+ mode: File mode (default 0o100644 for regular file)
82
+
83
+ Raises:
84
+ ValueError: If filepath escapes staging directory (path traversal)
85
+ """
86
+ staging_path = _path_under_root(filepath, self.staging_dir)
87
+ if staging_path is None:
88
+ raise ValueError(f"Path escapes staging area: {filepath}")
89
+
90
+ self._index[filepath] = StagedFile(
91
+ path=filepath,
92
+ blob_hash=blob_hash,
93
+ mode=mode
94
+ )
95
+
96
+ staging_path.parent.mkdir(parents=True, exist_ok=True)
97
+ staging_path.write_bytes(content)
98
+
99
+ self._save_index()
100
+
101
+ def remove(self, filepath: str) -> bool:
102
+ """
103
+ Remove a file from the staging area.
104
+
105
+ Returns:
106
+ True if file was in staging, False otherwise
107
+ """
108
+ if filepath in self._index:
109
+ del self._index[filepath]
110
+
111
+ staging_path = _path_under_root(filepath, self.staging_dir)
112
+ if staging_path is not None and staging_path.exists():
113
+ staging_path.unlink()
114
+ # Clean up empty directories
115
+ self._cleanup_empty_dirs(staging_path.parent)
116
+
117
+ self._save_index()
118
+ return True
119
+ return False
120
+
121
+ def _cleanup_empty_dirs(self, dir_path: Path):
122
+ """Remove empty directories up to staging root."""
123
+ try:
124
+ while dir_path != self.staging_dir:
125
+ if dir_path.exists() and not any(dir_path.iterdir()):
126
+ dir_path.rmdir()
127
+ dir_path = dir_path.parent
128
+ else:
129
+ break
130
+ except OSError:
131
+ pass
132
+
133
+ def get_staged_files(self) -> Dict[str, StagedFile]:
134
+ """Get all staged files."""
135
+ return dict(self._index)
136
+
137
+ def is_staged(self, filepath: str) -> bool:
138
+ """Check if a file is staged."""
139
+ return filepath in self._index
140
+
141
+ def get_blob_hash(self, filepath: str) -> Optional[str]:
142
+ """Get the blob hash for a staged file."""
143
+ if filepath in self._index:
144
+ return self._index[filepath].blob_hash
145
+ return None
146
+
147
+ def clear(self):
148
+ """Clear the entire staging area."""
149
+ self._index = {}
150
+
151
+ # Remove staging directory contents
152
+ if self.staging_dir.exists():
153
+ shutil.rmtree(self.staging_dir)
154
+ self.staging_dir.mkdir(parents=True, exist_ok=True)
155
+
156
+ # Remove index file
157
+ if self.index_file.exists():
158
+ self.index_file.unlink()
159
+
160
+ def get_status(self) -> Dict[str, List[str]]:
161
+ """
162
+ Get staging status.
163
+
164
+ Returns:
165
+ Dict with 'staged', 'modified', 'deleted', 'untracked' lists
166
+ """
167
+ staged = list(self._index.keys())
168
+
169
+ return {
170
+ 'staged': staged,
171
+ 'modified': [], # TODO: Compare with working directory
172
+ 'deleted': [], # TODO: Check if files were deleted
173
+ 'untracked': [] # TODO: Find untracked files
174
+ }
175
+
176
+ def get_tree_entries(self) -> List[Dict]:
177
+ """
178
+ Get tree entries for creating a tree object.
179
+
180
+ Returns:
181
+ List of entry dictionaries for Tree creation
182
+ """
183
+ entries = []
184
+ for path, sf in self._index.items():
185
+ entries.append({
186
+ 'mode': oct(sf.mode)[2:], # Convert to string like '100644'
187
+ 'type': 'blob',
188
+ 'hash': sf.blob_hash,
189
+ 'name': Path(path).name,
190
+ 'path': str(Path(path).parent) if str(Path(path).parent) != '.' else ''
191
+ })
192
+ return entries
193
+
194
+ def diff_with_head(self, repo) -> Dict[str, Dict]:
195
+ """
196
+ Compare staging area with HEAD commit.
197
+
198
+ Returns:
199
+ Dict mapping file paths to change info
200
+ """
201
+ changes = {}
202
+
203
+ # Get HEAD tree
204
+ head_commit = repo.get_head_commit()
205
+ if head_commit:
206
+ head_tree_bytes = repo.object_store.retrieve(head_commit.tree, 'tree')
207
+ if head_tree_bytes:
208
+ head_data = json.loads(head_tree_bytes.decode('utf-8'))
209
+ head_entries = {e['path'] + '/' + e['name'] if e['path'] else e['name']: e
210
+ for e in head_data.get('entries', [])}
211
+ else:
212
+ head_entries = {}
213
+
214
+ # Compare with staging
215
+ for path, sf in self._index.items():
216
+ if path in head_entries:
217
+ if head_entries[path]['hash'] != sf.blob_hash:
218
+ changes[path] = {'status': 'modified', 'blob_hash': sf.blob_hash}
219
+ else:
220
+ changes[path] = {'status': 'added', 'blob_hash': sf.blob_hash}
221
+
222
+ # Check for deleted files
223
+ for path in head_entries:
224
+ if path not in self._index:
225
+ changes[path] = {'status': 'deleted'}
226
+
227
+ return changes