agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
- agmem-0.1.2.dist-info/RECORD +86 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +35 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +77 -76
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +4 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +81 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +74 -0
- memvcs/commands/fsck.py +55 -61
- memvcs/commands/garden.py +28 -37
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +16 -28
- memvcs/commands/pack.py +129 -0
- memvcs/commands/pull.py +4 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +59 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/gardener.py +164 -132
- memvcs/core/hooks.py +48 -14
- memvcs/core/knowledge_graph.py +134 -138
- memvcs/core/merge.py +248 -171
- memvcs/core/objects.py +95 -96
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/refs.py +132 -115
- memvcs/core/repository.py +174 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +112 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/vector_store.py +41 -35
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/core/test_runner.py
CHANGED
|
@@ -14,6 +14,7 @@ from datetime import datetime
|
|
|
14
14
|
|
|
15
15
|
try:
|
|
16
16
|
import yaml
|
|
17
|
+
|
|
17
18
|
YAML_AVAILABLE = True
|
|
18
19
|
except ImportError:
|
|
19
20
|
YAML_AVAILABLE = False
|
|
@@ -22,6 +23,7 @@ except ImportError:
|
|
|
22
23
|
@dataclass
|
|
23
24
|
class TestCase:
|
|
24
25
|
"""A single test case for memory validation."""
|
|
26
|
+
|
|
25
27
|
name: str
|
|
26
28
|
query: str
|
|
27
29
|
expected_fact: str
|
|
@@ -33,6 +35,7 @@ class TestCase:
|
|
|
33
35
|
@dataclass
|
|
34
36
|
class TestFailure:
|
|
35
37
|
"""Represents a failed test."""
|
|
38
|
+
|
|
36
39
|
test_name: str
|
|
37
40
|
query: str
|
|
38
41
|
expected: str
|
|
@@ -44,6 +47,7 @@ class TestFailure:
|
|
|
44
47
|
@dataclass
|
|
45
48
|
class TestResult:
|
|
46
49
|
"""Result of running memory tests."""
|
|
50
|
+
|
|
47
51
|
passed: bool
|
|
48
52
|
total_count: int
|
|
49
53
|
passed_count: int
|
|
@@ -55,118 +59,122 @@ class TestResult:
|
|
|
55
59
|
class TestRunner:
|
|
56
60
|
"""
|
|
57
61
|
Runner for memory regression tests.
|
|
58
|
-
|
|
62
|
+
|
|
59
63
|
Tests are defined in YAML files in the tests/ directory of the memory repo.
|
|
60
64
|
"""
|
|
61
|
-
|
|
65
|
+
|
|
62
66
|
def __init__(self, repo, vector_store=None):
|
|
63
67
|
"""
|
|
64
68
|
Initialize test runner.
|
|
65
|
-
|
|
69
|
+
|
|
66
70
|
Args:
|
|
67
71
|
repo: Repository instance
|
|
68
72
|
vector_store: Optional VectorStore for semantic search tests
|
|
69
73
|
"""
|
|
70
74
|
self.repo = repo
|
|
71
75
|
self.vector_store = vector_store
|
|
72
|
-
self.tests_dir = repo.root /
|
|
73
|
-
|
|
76
|
+
self.tests_dir = repo.root / "tests"
|
|
77
|
+
|
|
74
78
|
def load_tests(self) -> List[TestCase]:
|
|
75
79
|
"""
|
|
76
80
|
Load all test cases from the tests/ directory.
|
|
77
|
-
|
|
81
|
+
|
|
78
82
|
Returns:
|
|
79
83
|
List of TestCase objects
|
|
80
84
|
"""
|
|
81
85
|
tests = []
|
|
82
|
-
|
|
86
|
+
|
|
83
87
|
if not self.tests_dir.exists():
|
|
84
88
|
return tests
|
|
85
|
-
|
|
86
|
-
for test_file in self.tests_dir.glob(
|
|
89
|
+
|
|
90
|
+
for test_file in self.tests_dir.glob("**/*.yaml"):
|
|
87
91
|
tests.extend(self._load_test_file(test_file))
|
|
88
|
-
|
|
89
|
-
for test_file in self.tests_dir.glob(
|
|
92
|
+
|
|
93
|
+
for test_file in self.tests_dir.glob("**/*.yml"):
|
|
90
94
|
tests.extend(self._load_test_file(test_file))
|
|
91
|
-
|
|
92
|
-
for test_file in self.tests_dir.glob(
|
|
95
|
+
|
|
96
|
+
for test_file in self.tests_dir.glob("**/*.json"):
|
|
93
97
|
tests.extend(self._load_json_test_file(test_file))
|
|
94
|
-
|
|
98
|
+
|
|
95
99
|
return tests
|
|
96
|
-
|
|
100
|
+
|
|
97
101
|
def _load_test_file(self, path: Path) -> List[TestCase]:
|
|
98
102
|
"""Load tests from a YAML file."""
|
|
99
103
|
if not YAML_AVAILABLE:
|
|
100
104
|
return []
|
|
101
|
-
|
|
105
|
+
|
|
102
106
|
try:
|
|
103
107
|
with open(path) as f:
|
|
104
108
|
data = yaml.safe_load(f)
|
|
105
|
-
|
|
106
|
-
if not data or
|
|
109
|
+
|
|
110
|
+
if not data or "tests" not in data:
|
|
107
111
|
return []
|
|
108
|
-
|
|
112
|
+
|
|
109
113
|
tests = []
|
|
110
114
|
file_name = path.stem
|
|
111
|
-
|
|
112
|
-
for i, test_data in enumerate(data[
|
|
113
|
-
name = test_data.get(
|
|
114
|
-
tests.append(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
115
|
+
|
|
116
|
+
for i, test_data in enumerate(data["tests"]):
|
|
117
|
+
name = test_data.get("name", f"{file_name}_{i}")
|
|
118
|
+
tests.append(
|
|
119
|
+
TestCase(
|
|
120
|
+
name=name,
|
|
121
|
+
query=test_data["query"],
|
|
122
|
+
expected_fact=test_data["expected_fact"],
|
|
123
|
+
confidence_threshold=test_data.get("confidence_threshold", 0.7),
|
|
124
|
+
required=test_data.get("required", False),
|
|
125
|
+
tags=test_data.get("tags", []),
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
|
|
123
129
|
return tests
|
|
124
|
-
|
|
130
|
+
|
|
125
131
|
except Exception as e:
|
|
126
132
|
print(f"Warning: Failed to load test file {path}: {e}")
|
|
127
133
|
return []
|
|
128
|
-
|
|
134
|
+
|
|
129
135
|
def _load_json_test_file(self, path: Path) -> List[TestCase]:
|
|
130
136
|
"""Load tests from a JSON file."""
|
|
131
137
|
try:
|
|
132
138
|
with open(path) as f:
|
|
133
139
|
data = json.load(f)
|
|
134
|
-
|
|
140
|
+
|
|
135
141
|
if not data:
|
|
136
142
|
return []
|
|
137
|
-
|
|
143
|
+
|
|
138
144
|
# Support both array of tests and object with 'tests' key
|
|
139
145
|
if isinstance(data, list):
|
|
140
146
|
test_list = data
|
|
141
|
-
elif
|
|
142
|
-
test_list = data[
|
|
147
|
+
elif "tests" in data:
|
|
148
|
+
test_list = data["tests"]
|
|
143
149
|
else:
|
|
144
150
|
return []
|
|
145
|
-
|
|
151
|
+
|
|
146
152
|
tests = []
|
|
147
153
|
file_name = path.stem
|
|
148
|
-
|
|
154
|
+
|
|
149
155
|
for i, test_data in enumerate(test_list):
|
|
150
|
-
name = test_data.get(
|
|
151
|
-
tests.append(
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
156
|
+
name = test_data.get("name", f"{file_name}_{i}")
|
|
157
|
+
tests.append(
|
|
158
|
+
TestCase(
|
|
159
|
+
name=name,
|
|
160
|
+
query=test_data["query"],
|
|
161
|
+
expected_fact=test_data["expected_fact"],
|
|
162
|
+
confidence_threshold=test_data.get("confidence_threshold", 0.7),
|
|
163
|
+
required=test_data.get("required", False),
|
|
164
|
+
tags=test_data.get("tags", []),
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
|
|
160
168
|
return tests
|
|
161
|
-
|
|
169
|
+
|
|
162
170
|
except Exception as e:
|
|
163
171
|
print(f"Warning: Failed to load test file {path}: {e}")
|
|
164
172
|
return []
|
|
165
|
-
|
|
173
|
+
|
|
166
174
|
def run_test(self, test: TestCase) -> Optional[TestFailure]:
|
|
167
175
|
"""
|
|
168
176
|
Run a single test case.
|
|
169
|
-
|
|
177
|
+
|
|
170
178
|
Returns:
|
|
171
179
|
TestFailure if test failed, None if passed
|
|
172
180
|
"""
|
|
@@ -176,12 +184,12 @@ class TestRunner:
|
|
|
176
184
|
else:
|
|
177
185
|
# Fall back to simple text matching
|
|
178
186
|
return self._run_text_test(test)
|
|
179
|
-
|
|
187
|
+
|
|
180
188
|
def _run_semantic_test(self, test: TestCase) -> Optional[TestFailure]:
|
|
181
189
|
"""Run test using semantic search."""
|
|
182
190
|
try:
|
|
183
191
|
results = self.vector_store.search(test.query, k=5)
|
|
184
|
-
|
|
192
|
+
|
|
185
193
|
if not results:
|
|
186
194
|
return TestFailure(
|
|
187
195
|
test_name=test.name,
|
|
@@ -189,30 +197,30 @@ class TestRunner:
|
|
|
189
197
|
expected=test.expected_fact,
|
|
190
198
|
actual=None,
|
|
191
199
|
message="No results found for query",
|
|
192
|
-
is_critical=test.required
|
|
200
|
+
is_critical=test.required,
|
|
193
201
|
)
|
|
194
|
-
|
|
202
|
+
|
|
195
203
|
# Check if any result contains the expected fact
|
|
196
204
|
for result in results:
|
|
197
|
-
content = result.get(
|
|
198
|
-
similarity = result.get(
|
|
199
|
-
|
|
205
|
+
content = result.get("content", "")
|
|
206
|
+
similarity = result.get("similarity", 0)
|
|
207
|
+
|
|
200
208
|
if similarity >= test.confidence_threshold:
|
|
201
209
|
# Use simple string matching as judge
|
|
202
210
|
if self._fact_matches(test.expected_fact, content):
|
|
203
211
|
return None # Test passed
|
|
204
|
-
|
|
212
|
+
|
|
205
213
|
# No matching result found
|
|
206
214
|
best_result = results[0] if results else {}
|
|
207
215
|
return TestFailure(
|
|
208
216
|
test_name=test.name,
|
|
209
217
|
query=test.query,
|
|
210
218
|
expected=test.expected_fact,
|
|
211
|
-
actual=best_result.get(
|
|
219
|
+
actual=best_result.get("content", "")[:200],
|
|
212
220
|
message=f"Expected fact not found in top results (best similarity: {best_result.get('similarity', 0):.2f})",
|
|
213
|
-
is_critical=test.required
|
|
221
|
+
is_critical=test.required,
|
|
214
222
|
)
|
|
215
|
-
|
|
223
|
+
|
|
216
224
|
except Exception as e:
|
|
217
225
|
return TestFailure(
|
|
218
226
|
test_name=test.name,
|
|
@@ -220,14 +228,14 @@ class TestRunner:
|
|
|
220
228
|
expected=test.expected_fact,
|
|
221
229
|
actual=None,
|
|
222
230
|
message=f"Error running semantic test: {e}",
|
|
223
|
-
is_critical=test.required
|
|
231
|
+
is_critical=test.required,
|
|
224
232
|
)
|
|
225
|
-
|
|
233
|
+
|
|
226
234
|
def _run_text_test(self, test: TestCase) -> Optional[TestFailure]:
|
|
227
235
|
"""Run test using simple text search through memory files."""
|
|
228
236
|
try:
|
|
229
|
-
current_dir = self.repo.root /
|
|
230
|
-
|
|
237
|
+
current_dir = self.repo.root / "current"
|
|
238
|
+
|
|
231
239
|
if not current_dir.exists():
|
|
232
240
|
return TestFailure(
|
|
233
241
|
test_name=test.name,
|
|
@@ -235,27 +243,27 @@ class TestRunner:
|
|
|
235
243
|
expected=test.expected_fact,
|
|
236
244
|
actual=None,
|
|
237
245
|
message="No current/ directory found",
|
|
238
|
-
is_critical=test.required
|
|
246
|
+
is_critical=test.required,
|
|
239
247
|
)
|
|
240
|
-
|
|
248
|
+
|
|
241
249
|
# Search through all memory files
|
|
242
|
-
for memory_file in current_dir.glob(
|
|
250
|
+
for memory_file in current_dir.glob("**/*.md"):
|
|
243
251
|
try:
|
|
244
252
|
content = memory_file.read_text()
|
|
245
253
|
if self._fact_matches(test.expected_fact, content):
|
|
246
254
|
return None # Test passed
|
|
247
255
|
except Exception:
|
|
248
256
|
continue
|
|
249
|
-
|
|
257
|
+
|
|
250
258
|
return TestFailure(
|
|
251
259
|
test_name=test.name,
|
|
252
260
|
query=test.query,
|
|
253
261
|
expected=test.expected_fact,
|
|
254
262
|
actual=None,
|
|
255
263
|
message="Expected fact not found in any memory file",
|
|
256
|
-
is_critical=test.required
|
|
264
|
+
is_critical=test.required,
|
|
257
265
|
)
|
|
258
|
-
|
|
266
|
+
|
|
259
267
|
except Exception as e:
|
|
260
268
|
return TestFailure(
|
|
261
269
|
test_name=test.name,
|
|
@@ -263,83 +271,83 @@ class TestRunner:
|
|
|
263
271
|
expected=test.expected_fact,
|
|
264
272
|
actual=None,
|
|
265
273
|
message=f"Error running text test: {e}",
|
|
266
|
-
is_critical=test.required
|
|
274
|
+
is_critical=test.required,
|
|
267
275
|
)
|
|
268
|
-
|
|
276
|
+
|
|
269
277
|
def _fact_matches(self, expected: str, content: str) -> bool:
|
|
270
278
|
"""
|
|
271
279
|
Check if expected fact is present in content.
|
|
272
|
-
|
|
280
|
+
|
|
273
281
|
Uses case-insensitive substring matching.
|
|
274
282
|
For more sophisticated matching, this could use an LLM judge.
|
|
275
283
|
"""
|
|
276
284
|
expected_lower = expected.lower()
|
|
277
285
|
content_lower = content.lower()
|
|
278
|
-
|
|
286
|
+
|
|
279
287
|
# Direct substring match
|
|
280
288
|
if expected_lower in content_lower:
|
|
281
289
|
return True
|
|
282
|
-
|
|
290
|
+
|
|
283
291
|
# Check if all key words are present
|
|
284
292
|
key_words = expected_lower.split()
|
|
285
293
|
if len(key_words) > 2:
|
|
286
294
|
matches = sum(1 for word in key_words if word in content_lower)
|
|
287
295
|
if matches >= len(key_words) * 0.8: # 80% of words match
|
|
288
296
|
return True
|
|
289
|
-
|
|
297
|
+
|
|
290
298
|
return False
|
|
291
|
-
|
|
299
|
+
|
|
292
300
|
def run_all(self, tags: Optional[List[str]] = None) -> TestResult:
|
|
293
301
|
"""
|
|
294
302
|
Run all tests.
|
|
295
|
-
|
|
303
|
+
|
|
296
304
|
Args:
|
|
297
305
|
tags: Optional list of tags to filter tests
|
|
298
|
-
|
|
306
|
+
|
|
299
307
|
Returns:
|
|
300
308
|
TestResult with overall results
|
|
301
309
|
"""
|
|
302
310
|
start_time = datetime.now()
|
|
303
311
|
tests = self.load_tests()
|
|
304
|
-
|
|
312
|
+
|
|
305
313
|
# Filter by tags if specified
|
|
306
314
|
if tags:
|
|
307
315
|
tests = [t for t in tests if any(tag in t.tags for tag in tags)]
|
|
308
|
-
|
|
316
|
+
|
|
309
317
|
failures = []
|
|
310
318
|
passed_count = 0
|
|
311
|
-
|
|
319
|
+
|
|
312
320
|
for test in tests:
|
|
313
321
|
failure = self.run_test(test)
|
|
314
322
|
if failure:
|
|
315
323
|
failures.append(failure)
|
|
316
324
|
else:
|
|
317
325
|
passed_count += 1
|
|
318
|
-
|
|
326
|
+
|
|
319
327
|
duration = (datetime.now() - start_time).total_seconds() * 1000
|
|
320
|
-
|
|
328
|
+
|
|
321
329
|
# Check if any critical tests failed
|
|
322
330
|
critical_failures = [f for f in failures if f.is_critical]
|
|
323
331
|
passed = len(critical_failures) == 0
|
|
324
|
-
|
|
332
|
+
|
|
325
333
|
return TestResult(
|
|
326
334
|
passed=passed,
|
|
327
335
|
total_count=len(tests),
|
|
328
336
|
passed_count=passed_count,
|
|
329
337
|
failed_count=len(failures),
|
|
330
338
|
failures=failures,
|
|
331
|
-
duration_ms=int(duration)
|
|
339
|
+
duration_ms=int(duration),
|
|
332
340
|
)
|
|
333
|
-
|
|
341
|
+
|
|
334
342
|
def run_for_branch(self, branch: str) -> TestResult:
|
|
335
343
|
"""
|
|
336
344
|
Run tests against a specific branch.
|
|
337
|
-
|
|
345
|
+
|
|
338
346
|
Creates a temporary vector store with only the branch's data.
|
|
339
|
-
|
|
347
|
+
|
|
340
348
|
Args:
|
|
341
349
|
branch: Branch name to test
|
|
342
|
-
|
|
350
|
+
|
|
343
351
|
Returns:
|
|
344
352
|
TestResult
|
|
345
353
|
"""
|
memvcs/core/vector_store.py
CHANGED
|
@@ -6,11 +6,11 @@ Requires: pip install agmem[vector]
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
+
import struct
|
|
10
|
+
from pathlib import Path
|
|
9
11
|
from typing import List, Optional, Tuple
|
|
10
12
|
|
|
11
13
|
from .constants import MEMORY_TYPES
|
|
12
|
-
import struct
|
|
13
|
-
from pathlib import Path
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger("agmem.vector_store")
|
|
16
16
|
|
|
@@ -75,7 +75,8 @@ class VectorStore:
|
|
|
75
75
|
def _ensure_tables(self):
|
|
76
76
|
"""Create vector and metadata tables if they don't exist."""
|
|
77
77
|
conn = self._get_connection()
|
|
78
|
-
conn.execute(
|
|
78
|
+
conn.execute(
|
|
79
|
+
"""
|
|
79
80
|
CREATE TABLE IF NOT EXISTS memory_meta (
|
|
80
81
|
rowid INTEGER PRIMARY KEY,
|
|
81
82
|
path TEXT NOT NULL,
|
|
@@ -85,18 +86,21 @@ class VectorStore:
|
|
|
85
86
|
author TEXT,
|
|
86
87
|
indexed_at TEXT
|
|
87
88
|
)
|
|
88
|
-
"""
|
|
89
|
+
"""
|
|
90
|
+
)
|
|
89
91
|
# Try to add new columns to existing tables (for upgrades)
|
|
90
|
-
for col in [
|
|
92
|
+
for col in ["commit_hash TEXT", "author TEXT", "indexed_at TEXT"]:
|
|
91
93
|
try:
|
|
92
94
|
conn.execute(f"ALTER TABLE memory_meta ADD COLUMN {col}")
|
|
93
95
|
except Exception:
|
|
94
96
|
pass # Column already exists
|
|
95
97
|
try:
|
|
96
|
-
conn.execute(
|
|
98
|
+
conn.execute(
|
|
99
|
+
f"""
|
|
97
100
|
CREATE VIRTUAL TABLE IF NOT EXISTS vec_memory
|
|
98
101
|
USING vec0(embedding float[{EMBEDDING_DIM}])
|
|
99
|
-
"""
|
|
102
|
+
"""
|
|
103
|
+
)
|
|
100
104
|
except Exception as e:
|
|
101
105
|
# vec0 might already exist with different schema
|
|
102
106
|
logger.debug("vec_memory creation: %s", e)
|
|
@@ -114,11 +118,11 @@ class VectorStore:
|
|
|
114
118
|
content: str,
|
|
115
119
|
blob_hash: Optional[str] = None,
|
|
116
120
|
commit_hash: Optional[str] = None,
|
|
117
|
-
author: Optional[str] = None
|
|
121
|
+
author: Optional[str] = None,
|
|
118
122
|
) -> None:
|
|
119
123
|
"""
|
|
120
124
|
Index a memory file for semantic search.
|
|
121
|
-
|
|
125
|
+
|
|
122
126
|
Args:
|
|
123
127
|
path: File path relative to current/
|
|
124
128
|
content: File content to index
|
|
@@ -127,13 +131,13 @@ class VectorStore:
|
|
|
127
131
|
author: Optional author string for provenance tracking
|
|
128
132
|
"""
|
|
129
133
|
from datetime import datetime
|
|
130
|
-
|
|
134
|
+
|
|
131
135
|
self._ensure_tables()
|
|
132
136
|
conn = self._get_connection()
|
|
133
137
|
|
|
134
138
|
embedding = self._embed(content)
|
|
135
139
|
emb_bytes = _serialize_f32(embedding)
|
|
136
|
-
indexed_at = datetime.utcnow().isoformat() +
|
|
140
|
+
indexed_at = datetime.utcnow().isoformat() + "Z"
|
|
137
141
|
|
|
138
142
|
with conn:
|
|
139
143
|
conn.execute(
|
|
@@ -203,13 +207,13 @@ class VectorStore:
|
|
|
203
207
|
results.append((path, snippet, float(distance)))
|
|
204
208
|
|
|
205
209
|
return results
|
|
206
|
-
|
|
210
|
+
|
|
207
211
|
def search_with_provenance(
|
|
208
212
|
self, query: str, limit: int = 10, min_score: Optional[float] = None
|
|
209
213
|
) -> List[dict]:
|
|
210
214
|
"""
|
|
211
215
|
Semantic search with provenance metadata.
|
|
212
|
-
|
|
216
|
+
|
|
213
217
|
Returns list of dicts with: path, content, distance, commit_hash, author, indexed_at
|
|
214
218
|
"""
|
|
215
219
|
self._ensure_tables()
|
|
@@ -235,51 +239,53 @@ class VectorStore:
|
|
|
235
239
|
if min_score is not None and distance > min_score:
|
|
236
240
|
continue
|
|
237
241
|
snippet = content[:500] + ("..." if len(content) > 500 else "")
|
|
238
|
-
results.append(
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
242
|
+
results.append(
|
|
243
|
+
{
|
|
244
|
+
"path": path,
|
|
245
|
+
"content": snippet,
|
|
246
|
+
"distance": float(distance),
|
|
247
|
+
"similarity": 1.0 - float(distance), # Convert to similarity score
|
|
248
|
+
"commit_hash": commit_hash,
|
|
249
|
+
"author": author,
|
|
250
|
+
"indexed_at": indexed_at,
|
|
251
|
+
"blob_hash": blob_hash,
|
|
252
|
+
}
|
|
253
|
+
)
|
|
248
254
|
|
|
249
255
|
return results
|
|
250
|
-
|
|
256
|
+
|
|
251
257
|
def get_all_entries(self) -> List[dict]:
|
|
252
258
|
"""
|
|
253
259
|
Get all indexed entries with their metadata.
|
|
254
|
-
|
|
260
|
+
|
|
255
261
|
Used for fsck operations to check for dangling vectors.
|
|
256
262
|
"""
|
|
257
263
|
self._ensure_tables()
|
|
258
264
|
conn = self._get_connection()
|
|
259
|
-
|
|
265
|
+
|
|
260
266
|
rows = conn.execute(
|
|
261
267
|
"""
|
|
262
268
|
SELECT rowid, path, blob_hash, commit_hash, author, indexed_at
|
|
263
269
|
FROM memory_meta
|
|
264
270
|
"""
|
|
265
271
|
).fetchall()
|
|
266
|
-
|
|
272
|
+
|
|
267
273
|
return [
|
|
268
274
|
{
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
+
"rowid": rowid,
|
|
276
|
+
"path": path,
|
|
277
|
+
"blob_hash": blob_hash,
|
|
278
|
+
"commit_hash": commit_hash,
|
|
279
|
+
"author": author,
|
|
280
|
+
"indexed_at": indexed_at,
|
|
275
281
|
}
|
|
276
282
|
for rowid, path, blob_hash, commit_hash, author, indexed_at in rows
|
|
277
283
|
]
|
|
278
|
-
|
|
284
|
+
|
|
279
285
|
def delete_entry(self, rowid: int) -> bool:
|
|
280
286
|
"""
|
|
281
287
|
Delete an entry by rowid.
|
|
282
|
-
|
|
288
|
+
|
|
283
289
|
Used by fsck to remove dangling vectors.
|
|
284
290
|
"""
|
|
285
291
|
conn = self._get_connection()
|
|
@@ -219,9 +219,7 @@ def _create_mcp_server():
|
|
|
219
219
|
fp = Path(root) / f
|
|
220
220
|
rel = str(fp.relative_to(repo.current_dir))
|
|
221
221
|
working_files[rel] = fp.read_bytes()
|
|
222
|
-
tree_diff = engine.diff_working_dir(
|
|
223
|
-
head_commit.store(repo.object_store), working_files
|
|
224
|
-
)
|
|
222
|
+
tree_diff = engine.diff_working_dir(head_commit.store(repo.object_store), working_files)
|
|
225
223
|
return engine.format_diff(tree_diff, "HEAD", "working")
|
|
226
224
|
else:
|
|
227
225
|
base_ref = base or "HEAD~1"
|